mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge branch 'main' into fix/issue-3185
This commit is contained in:
commit
9b3c041af0
139 changed files with 16350 additions and 825 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern
|
||||
|
|
|
|||
35
.github/actions/setup-typescript-client/action.yml
vendored
Normal file
35
.github/actions/setup-typescript-client/action.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Setup TypeScript client
|
||||
description: Conditionally checkout and link llama-stack-client-typescript based on client-version
|
||||
inputs:
|
||||
client-version:
|
||||
description: 'Client version (latest or published)'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
ts-client-path:
|
||||
description: 'Path or version to use for TypeScript client'
|
||||
value: ${{ steps.set-path.outputs.ts-client-path }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Checkout TypeScript client (latest)
|
||||
if: ${{ inputs.client-version == 'latest' }}
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
repository: llamastack/llama-stack-client-typescript
|
||||
ref: main
|
||||
path: .ts-client-checkout
|
||||
|
||||
- name: Set TS_CLIENT_PATH
|
||||
id: set-path
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
12
.github/workflows/backward-compat.yml
vendored
12
.github/workflows/backward-compat.yml
vendored
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0 # Need full history to access main branch
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ jobs:
|
|||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
|
@ -151,7 +151,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
|
@ -236,7 +236,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
|
@ -405,7 +405,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
|
@ -415,7 +415,7 @@ jobs:
|
|||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
|
|
|
|||
4
.github/workflows/changelog.yml
vendored
4
.github/workflows/changelog.yml
vendored
|
|
@ -17,13 +17,13 @@ jobs:
|
|||
pull-requests: write # for peter-evans/create-pull-request to create a PR
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
ref: main
|
||||
fetch-depth: 0
|
||||
- run: |
|
||||
python ./scripts/gen-changelog.py
|
||||
- uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8
|
||||
- uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
|
||||
with:
|
||||
title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
|
||||
commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
|
||||
|
|
|
|||
4
.github/workflows/conformance.yml
vendored
4
.github/workflows/conformance.yml
vendored
|
|
@ -35,7 +35,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout PR Code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ jobs:
|
|||
# This allows us to diff the current changes against the previous state
|
||||
- name: Checkout Base Branch
|
||||
if: steps.skip-check.outputs.skip != 'true'
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
ref: ${{ github.event.pull_request.base.ref }}
|
||||
path: 'base'
|
||||
|
|
|
|||
4
.github/workflows/install-script-ci.yml
vendored
4
.github/workflows/install-script-ci.yml
vendored
|
|
@ -16,14 +16,14 @@ jobs:
|
|||
lint:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0
|
||||
- uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0
|
||||
- name: Run ShellCheck on install.sh
|
||||
run: shellcheck scripts/install.sh
|
||||
smoke-test-on-dev:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
2
.github/workflows/integration-auth-tests.yml
vendored
2
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -35,7 +35,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
20
.github/workflows/integration-tests.yml
vendored
20
.github/workflows/integration-tests.yml
vendored
|
|
@ -50,7 +50,7 @@ jobs:
|
|||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Generate test matrix
|
||||
id: set-matrix
|
||||
|
|
@ -81,7 +81,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Setup test environment
|
||||
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
|
||||
|
|
@ -93,11 +93,27 @@ jobs:
|
|||
suite: ${{ matrix.config.suite }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Setup Node.js for TypeScript client tests
|
||||
if: ${{ matrix.client == 'server' }}
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: tests/integration/client-typescript/package-lock.json
|
||||
|
||||
- name: Setup TypeScript client
|
||||
if: ${{ matrix.client == 'server' }}
|
||||
id: setup-ts-client
|
||||
uses: ./.github/actions/setup-typescript-client
|
||||
with:
|
||||
client-version: ${{ matrix.client-version }}
|
||||
|
||||
- name: Run tests
|
||||
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
env:
|
||||
OPENAI_API_KEY: dummy
|
||||
TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
|
||||
with:
|
||||
stack-config: >-
|
||||
${{ matrix.config.stack_config
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
4
.github/workflows/pre-commit.yml
vendored
4
.github/workflows/pre-commit.yml
vendored
|
|
@ -22,7 +22,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
# For dependabot PRs, we need to checkout with a token that can push changes
|
||||
token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }}
|
||||
|
|
@ -46,7 +46,7 @@ jobs:
|
|||
cache-dependency-path: 'src/llama_stack_ui/'
|
||||
|
||||
- name: Set up uv
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
|
|
|
|||
10
.github/workflows/providers-build.yml
vendored
10
.github/workflows/providers-build.yml
vendored
|
|
@ -40,7 +40,7 @@ jobs:
|
|||
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
|
|
@ -59,7 +59,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
@ -93,7 +93,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
@ -106,7 +106,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
@ -146,7 +146,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
8
.github/workflows/providers-list-deps.yml
vendored
8
.github/workflows/providers-list-deps.yml
vendored
|
|
@ -36,7 +36,7 @@ jobs:
|
|||
distros: ${{ steps.set-matrix.outputs.distros }}
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
|
|
@ -55,7 +55,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
@ -79,7 +79,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
@ -92,7 +92,7 @@ jobs:
|
|||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
4
.github/workflows/python-build-test.yml
vendored
4
.github/workflows/python-build-test.yml
vendored
|
|
@ -21,10 +21,10 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
|
||||
uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ jobs:
|
|||
echo "::endgroup::"
|
||||
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
|
|
|
|||
56
.github/workflows/stainless-builds.yml
vendored
56
.github/workflows/stainless-builds.yml
vendored
|
|
@ -43,7 +43,41 @@ env:
|
|||
# Stainless organization dashboard
|
||||
|
||||
jobs:
|
||||
compute-branch:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
preview_branch: ${{ steps.compute.outputs.preview_branch }}
|
||||
base_branch: ${{ steps.compute.outputs.base_branch }}
|
||||
merge_branch: ${{ steps.compute.outputs.merge_branch }}
|
||||
steps:
|
||||
- name: Compute branch names
|
||||
id: compute
|
||||
run: |
|
||||
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
BASE_REPO="${{ github.repository }}"
|
||||
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
|
||||
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
|
||||
|
||||
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
|
||||
# Fork PR: prefix with fork owner for isolation
|
||||
if [ -z "$FORK_OWNER" ]; then
|
||||
echo "Error: Fork PR detected but fork owner is empty" >&2
|
||||
exit 1
|
||||
fi
|
||||
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
else
|
||||
# Same-repo PR
|
||||
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${BRANCH_NAME}"
|
||||
fi
|
||||
|
||||
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
|
||||
preview:
|
||||
needs: compute-branch
|
||||
if: github.event.action != 'closed'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -53,16 +87,14 @@ jobs:
|
|||
# Checkout the PR's code to access the OpenAPI spec and config files.
|
||||
# This is necessary to read the spec/config from the PR (including from forks).
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 2
|
||||
|
||||
# This action builds preview SDKs from the OpenAPI spec changes and
|
||||
# posts/updates a comment on the PR with build results and links to the preview.
|
||||
- name: Run preview builds
|
||||
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
|
||||
uses: stainless-api/upload-openapi-spec-action/preview@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
|
||||
with:
|
||||
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
|
||||
org: ${{ env.STAINLESS_ORG }}
|
||||
|
|
@ -73,8 +105,11 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
branch: ${{ needs.compute-branch.outputs.preview_branch }}
|
||||
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
|
||||
|
||||
merge:
|
||||
needs: compute-branch
|
||||
if: github.event.action == 'closed' && github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -84,20 +119,20 @@ jobs:
|
|||
# Checkout the PR's code to access the OpenAPI spec and config files.
|
||||
# This is necessary to read the spec/config from the PR (including from forks).
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
with:
|
||||
repository: ${{ github.event.pull_request.head.repo.full_name }}
|
||||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 2
|
||||
|
||||
# Note that this only merges in changes that happened on the last build on
|
||||
# preview/${{ github.head_ref }}. It's possible that there are OAS/config
|
||||
# changes that haven't been built, if the preview-sdk job didn't finish
|
||||
# the computed preview branch. It's possible that there are OAS/config
|
||||
# changes that haven't been built, if the preview job didn't finish
|
||||
# before this step starts. In theory we want to wait for all builds
|
||||
# against preview/${{ github.head_ref }} to complete, but assuming that
|
||||
# the preview-sdk job happens before the PR merge, it should be fine.
|
||||
# against the preview branch to complete, but assuming that
|
||||
# the preview job happens before the PR merge, it should be fine.
|
||||
- name: Run merge build
|
||||
uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
|
||||
uses: stainless-api/upload-openapi-spec-action/merge@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
|
||||
with:
|
||||
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
|
||||
org: ${{ env.STAINLESS_ORG }}
|
||||
|
|
@ -108,3 +143,4 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
# container and point 'uv pip install' to the correct path...
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
2
.github/workflows/test-external.yml
vendored
2
.github/workflows/test-external.yml
vendored
|
|
@ -27,7 +27,7 @@ jobs:
|
|||
# container and point 'uv pip install' to the correct path...
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
2
.github/workflows/ui-unit-tests.yml
vendored
2
.github/workflows/ui-unit-tests.yml
vendored
|
|
@ -26,7 +26,7 @@ jobs:
|
|||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Setup Node.js
|
||||
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
|
||||
|
|
|
|||
2
.github/workflows/unit-tests.yml
vendored
2
.github/workflows/unit-tests.yml
vendored
|
|
@ -36,7 +36,7 @@ jobs:
|
|||
- "3.13"
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
|
||||
|
||||
- name: Install dependencies
|
||||
uses: ./.github/actions/setup-runner
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -35,3 +35,5 @@ docs/static/imported-files/
|
|||
docs/docs/api-deprecated/
|
||||
docs/docs/api-experimental/
|
||||
docs/docs/api/
|
||||
tests/integration/client-typescript/node_modules/
|
||||
.ts-client-checkout/
|
||||
|
|
|
|||
|
|
@ -9862,9 +9862,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `api_base` | `HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||
| `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
|
||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
|
||||
```yaml
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
|
|||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The Databricks API token |
|
||||
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.DATABRICKS_HOST:=}
|
||||
base_url: ${env.DATABRICKS_HOST:=}
|
||||
api_token: ${env.DATABRICKS_TOKEN:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||
base_url: https://api.llama.com/compat/v1/
|
||||
api_key: ${env.LLAMA_API_KEY}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
|
||||
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
|
||||
| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||
| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
|
|||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str` | No | http://localhost:11434 | |
|
||||
| `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | | The URL for the passthrough endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.PASSTHROUGH_URL}
|
||||
base_url: ${env.PASSTHROUGH_URL}
|
||||
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The API token |
|
||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.RUNPOD_URL:=}
|
||||
base_url: ${env.RUNPOD_URL:=}
|
||||
api_token: ${env.RUNPOD_API_TOKEN}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
|||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str` | No | | The URL for the TGI serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The API token |
|
||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
|
||||
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
|
||||
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
```
|
||||
|
|
|
|||
122
docs/package-lock.json
generated
122
docs/package-lock.json
generated
|
|
@ -10712,12 +10712,6 @@
|
|||
"integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
|
||||
"license": "Unlicense"
|
||||
},
|
||||
"node_modules/fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
|
||||
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
|
|
@ -10821,21 +10815,20 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/glob": {
|
||||
"version": "7.2.3",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
|
||||
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
|
||||
"deprecated": "Glob versions prior to v9 are no longer supported",
|
||||
"version": "10.5.0",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
|
||||
"integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"fs.realpath": "^1.0.0",
|
||||
"inflight": "^1.0.4",
|
||||
"inherits": "2",
|
||||
"minimatch": "^3.1.1",
|
||||
"once": "^1.3.0",
|
||||
"path-is-absolute": "^1.0.0"
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
|
|
@ -10859,26 +10852,19 @@
|
|||
"integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/global-dirs": {
|
||||
|
|
@ -11792,17 +11778,6 @@
|
|||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/inflight": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
|
||||
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
|
||||
"deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"once": "^1.3.0",
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/inherits": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||
|
|
@ -15570,15 +15545,6 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/onetime": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
|
||||
|
|
@ -15955,15 +15921,6 @@
|
|||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/path-is-absolute": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
|
||||
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/path-is-inside": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
|
||||
|
|
@ -20038,41 +19995,6 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/sucrase/node_modules/glob": {
|
||||
"version": "10.4.5",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
|
||||
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/sucrase/node_modules/minimatch": {
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/supports-color": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
|
||||
|
|
@ -21620,12 +21542,6 @@
|
|||
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/write-file-atomic": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@
|
|||
"react-dom": "^19.0.0",
|
||||
"remark-code-import": "^1.2.0"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": "^10.5.0"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.5%",
|
||||
|
|
|
|||
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6705,9 +6705,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/experimental-llama-stack-spec.yaml
vendored
14
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -6061,9 +6061,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/llama-stack-spec.yaml
vendored
14
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -8883,9 +8883,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/stainless-llama-stack-spec.yaml
vendored
14
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -9862,9 +9862,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
|
|
@ -287,9 +287,9 @@ start_container() {
|
|||
# On macOS/Windows, use host.docker.internal to reach host from container
|
||||
# On Linux with --network host, use localhost
|
||||
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
|
||||
else
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
|
||||
fi
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
|
||||
|
|
|
|||
|
|
@ -16,16 +16,16 @@ import sys
|
|||
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
|
||||
|
||||
|
||||
def get_setup_env_vars(setup_name, suite_name=None):
|
||||
def get_setup_config(setup_name, suite_name=None):
|
||||
"""
|
||||
Get environment variables for a setup, with optional suite default fallback.
|
||||
Get full configuration (env vars + defaults) for a setup.
|
||||
|
||||
Args:
|
||||
setup_name: Name of the setup (e.g., 'ollama', 'gpt')
|
||||
suite_name: Optional suite name to get default setup if setup_name is None
|
||||
|
||||
Returns:
|
||||
Dictionary of environment variables
|
||||
Dictionary with 'env' and 'defaults' keys
|
||||
"""
|
||||
# If no setup specified, try to get default from suite
|
||||
if not setup_name and suite_name:
|
||||
|
|
@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
|
|||
setup_name = suite.default_setup
|
||||
|
||||
if not setup_name:
|
||||
return {}
|
||||
return {"env": {}, "defaults": {}}
|
||||
|
||||
setup = SETUP_DEFINITIONS.get(setup_name)
|
||||
if not setup:
|
||||
|
|
@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
|
|||
)
|
||||
sys.exit(1)
|
||||
|
||||
return setup.env
|
||||
return {"env": setup.env, "defaults": setup.defaults}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
|
||||
parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
|
||||
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
|
||||
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
|
||||
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
env_vars = get_setup_env_vars(args.setup, args.suite)
|
||||
config = get_setup_config(args.setup, args.suite)
|
||||
|
||||
if args.format == "bash":
|
||||
# Output as bash export statements
|
||||
for key, value in env_vars.items():
|
||||
# Output env vars as bash export statements
|
||||
for key, value in config["env"].items():
|
||||
print(f"export {key}='{value}'")
|
||||
# Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
|
||||
for key, value in config["defaults"].items():
|
||||
env_key = f"LLAMA_STACK_TEST_{key.upper()}"
|
||||
print(f"export {env_key}='{value}'")
|
||||
elif args.format == "json":
|
||||
import json
|
||||
|
||||
print(json.dumps(env_vars))
|
||||
print(json.dumps(config))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
|||
--network llama-net \
|
||||
-p "${PORT}:${PORT}" \
|
||||
"${server_env_opts[@]}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
|
||||
"${SERVER_IMAGE}" --port "${PORT}")
|
||||
|
||||
log "🦙 Starting Llama Stack..."
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ TEST_PATTERN=""
|
|||
INFERENCE_MODE="replay"
|
||||
EXTRA_PARAMS=""
|
||||
COLLECT_ONLY=false
|
||||
TYPESCRIPT_ONLY=false
|
||||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
|
|
@ -34,6 +35,7 @@ Options:
|
|||
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
|
||||
--pattern STRING Regex pattern to pass to pytest -k
|
||||
--collect-only Collect tests only without running them (skips server startup)
|
||||
--typescript-only Skip Python tests and run only TypeScript client tests
|
||||
--help Show this help message
|
||||
|
||||
Suites are defined in tests/integration/suites.py and define which tests to run.
|
||||
|
|
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
|
|||
COLLECT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--typescript-only)
|
||||
TYPESCRIPT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
|
|
@ -181,6 +187,10 @@ echo "$SETUP_ENV"
|
|||
eval "$SETUP_ENV"
|
||||
echo ""
|
||||
|
||||
# Export suite and setup names for TypeScript tests
|
||||
export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
|
||||
export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
|
||||
|
||||
ROOT_DIR="$THIS_DIR/.."
|
||||
cd $ROOT_DIR
|
||||
|
||||
|
|
@ -212,6 +222,71 @@ find_available_port() {
|
|||
return 1
|
||||
}
|
||||
|
||||
run_client_ts_tests() {
|
||||
if ! command -v npm &>/dev/null; then
|
||||
echo "npm could not be found; ensure Node.js is installed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
pushd tests/integration/client-typescript >/dev/null
|
||||
|
||||
# Determine if TS_CLIENT_PATH is a directory path or an npm version
|
||||
if [[ -d "$TS_CLIENT_PATH" ]]; then
|
||||
# It's a directory path - use local checkout
|
||||
if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
|
||||
echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
|
||||
popd >/dev/null
|
||||
return 1
|
||||
fi
|
||||
echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
|
||||
|
||||
# Build the TypeScript client first
|
||||
echo "Building TypeScript client..."
|
||||
pushd "$TS_CLIENT_PATH" >/dev/null
|
||||
npm install --silent
|
||||
npm run build --silent
|
||||
popd >/dev/null
|
||||
|
||||
# Install other dependencies first
|
||||
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
|
||||
npm ci --silent
|
||||
else
|
||||
npm install --silent
|
||||
fi
|
||||
|
||||
# Then install the client from local directory
|
||||
echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
|
||||
npm install "$TS_CLIENT_PATH" --silent
|
||||
else
|
||||
# It's an npm version specifier - install from npm
|
||||
echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
|
||||
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
|
||||
npm ci --silent
|
||||
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
|
||||
else
|
||||
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
|
||||
fi
|
||||
fi
|
||||
|
||||
# Verify installation
|
||||
echo "Verifying llama-stack-client installation..."
|
||||
if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
|
||||
echo "✅ llama-stack-client successfully installed"
|
||||
npm list llama-stack-client
|
||||
else
|
||||
echo "❌ llama-stack-client not found in node_modules"
|
||||
echo "Installed packages:"
|
||||
npm list --depth=0
|
||||
popd >/dev/null
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
|
||||
npm test
|
||||
|
||||
popd >/dev/null
|
||||
}
|
||||
|
||||
# Start Llama Stack Server if needed
|
||||
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
||||
# Find an available port for the server
|
||||
|
|
@ -221,6 +296,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
exit 1
|
||||
fi
|
||||
export LLAMA_STACK_PORT
|
||||
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
|
||||
echo "Will use port: $LLAMA_STACK_PORT"
|
||||
|
||||
stop_server() {
|
||||
|
|
@ -298,6 +374,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
exit 1
|
||||
fi
|
||||
export LLAMA_STACK_PORT
|
||||
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
|
||||
echo "Will use port: $LLAMA_STACK_PORT"
|
||||
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
|
|
@ -473,16 +550,23 @@ if [[ -n "$STACK_CONFIG" ]]; then
|
|||
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
|
||||
fi
|
||||
|
||||
pytest -s -v $PYTEST_TARGET \
|
||||
$STACK_CONFIG_ARG \
|
||||
--inference-mode="$INFERENCE_MODE" \
|
||||
-k "$PYTEST_PATTERN" \
|
||||
$EXTRA_PARAMS \
|
||||
--color=yes \
|
||||
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
|
||||
--color=yes $EXTRA_PARAMS \
|
||||
--capture=tee-sys
|
||||
exit_code=$?
|
||||
# Run Python tests unless typescript-only mode
|
||||
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
|
||||
pytest -s -v $PYTEST_TARGET \
|
||||
$STACK_CONFIG_ARG \
|
||||
--inference-mode="$INFERENCE_MODE" \
|
||||
-k "$PYTEST_PATTERN" \
|
||||
$EXTRA_PARAMS \
|
||||
--color=yes \
|
||||
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
|
||||
--color=yes $EXTRA_PARAMS \
|
||||
--capture=tee-sys
|
||||
exit_code=$?
|
||||
else
|
||||
echo "Skipping Python tests (--typescript-only mode)"
|
||||
exit_code=0
|
||||
fi
|
||||
|
||||
set +x
|
||||
set -e
|
||||
|
||||
|
|
@ -506,5 +590,10 @@ else
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# Run TypeScript client tests if TS_CLIENT_PATH is set
|
||||
if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
|
||||
run_client_ts_tests
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Integration Tests Complete ==="
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@ providers:
|
|||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@ providers:
|
|||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,44 +17,43 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
config:
|
||||
api_key: ${env.AWS_BEDROCK_API_KEY:=}
|
||||
api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
|
||||
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
|
||||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ providers:
|
|||
- provider_id: watsonx
|
||||
provider_type: remote::watsonx
|
||||
config:
|
||||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
vector_io:
|
||||
|
|
|
|||
|
|
@ -23,12 +23,14 @@ async def get_provider_impl(
|
|||
config,
|
||||
deps[Api.inference],
|
||||
deps[Api.vector_io],
|
||||
deps[Api.safety],
|
||||
deps.get(Api.safety),
|
||||
deps[Api.tool_runtime],
|
||||
deps[Api.tool_groups],
|
||||
deps[Api.conversations],
|
||||
policy,
|
||||
deps[Api.prompts],
|
||||
deps[Api.files],
|
||||
telemetry_enabled,
|
||||
policy,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|||
from llama_stack_api import (
|
||||
Agents,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
|
|
@ -22,6 +23,7 @@ from llama_stack_api import (
|
|||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrail,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -41,10 +43,12 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
config: MetaReferenceAgentsImplConfig,
|
||||
inference_api: Inference,
|
||||
vector_io_api: VectorIO,
|
||||
safety_api: Safety,
|
||||
safety_api: Safety | None,
|
||||
tool_runtime_api: ToolRuntime,
|
||||
tool_groups_api: ToolGroups,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
|
|
@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.tool_groups_api = tool_groups_api
|
||||
self.conversations_api = conversations_api
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||
self.policy = policy
|
||||
|
|
@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
vector_io_api=self.vector_io_api,
|
||||
safety_api=self.safety_api,
|
||||
conversations_api=self.conversations_api,
|
||||
prompts_api=self.prompts_api,
|
||||
files_api=self.files_api,
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
|
|
@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|||
from llama_stack_api import (
|
||||
ConversationItem,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
InvalidConversationIdError,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseMessage,
|
||||
|
|
@ -34,7 +39,9 @@ from llama_stack_api import (
|
|||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrailSpec,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
|
|||
from .tool_executor import ToolExecutor
|
||||
from .types import ChatCompletionContext, ToolContext
|
||||
from .utils import (
|
||||
convert_response_content_to_chat_content,
|
||||
convert_response_input_to_chat_messages,
|
||||
convert_response_text_to_chat_response_format,
|
||||
extract_guardrail_ids,
|
||||
|
|
@ -67,8 +75,10 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api: ToolRuntime,
|
||||
responses_store: ResponsesStore,
|
||||
vector_io_api: VectorIO, # VectorIO
|
||||
safety_api: Safety,
|
||||
safety_api: Safety | None,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
|
|
@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api=tool_runtime_api,
|
||||
vector_io_api=vector_io_api,
|
||||
)
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
|
||||
async def _prepend_previous_response(
|
||||
self,
|
||||
|
|
@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
|
|||
# Use stored messages directly and convert only new input
|
||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||
messages = message_adapter.validate_python(previous_response.messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
# Backward compatibility: reconstruct from inputs
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
tool_context.recover_tools_from_previous_response(previous_response)
|
||||
elif conversation is not None:
|
||||
|
|
@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
if not conversation_items.data:
|
||||
# First turn - just convert the new input
|
||||
messages = await convert_response_input_to_chat_messages(input)
|
||||
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
|
||||
else:
|
||||
if not stored_messages:
|
||||
all_input = conversation_items.data
|
||||
|
|
@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
|
||||
messages = stored_messages or []
|
||||
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
all_input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
all_input = input
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
return all_input, messages, tool_context
|
||||
|
||||
async def _prepend_prompt(
|
||||
self,
|
||||
messages: list[OpenAIMessageParam],
|
||||
openai_response_prompt: OpenAIResponsePrompt | None,
|
||||
) -> None:
|
||||
"""Prepend prompt template to messages, resolving text/image/file variables.
|
||||
|
||||
:param messages: List of OpenAIMessageParam objects
|
||||
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
if not openai_response_prompt or not openai_response_prompt.id:
|
||||
return
|
||||
|
||||
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
|
||||
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
|
||||
|
||||
if not cur_prompt or not cur_prompt.prompt:
|
||||
return
|
||||
|
||||
cur_prompt_text = cur_prompt.prompt
|
||||
cur_prompt_variables = cur_prompt.variables
|
||||
|
||||
if not openai_response_prompt.variables:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
|
||||
return
|
||||
|
||||
# Validate that all provided variables exist in the prompt
|
||||
for name in openai_response_prompt.variables.keys():
|
||||
if name not in cur_prompt_variables:
|
||||
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
|
||||
|
||||
# Separate text and media variables
|
||||
text_substitutions = {}
|
||||
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
|
||||
|
||||
for name, value in openai_response_prompt.variables.items():
|
||||
# Text variable found
|
||||
if isinstance(value, OpenAIResponseInputMessageContentText):
|
||||
text_substitutions[name] = value.text
|
||||
|
||||
# Media variable found
|
||||
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
|
||||
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
|
||||
if isinstance(converted_parts, list):
|
||||
media_content_parts.extend(converted_parts)
|
||||
|
||||
# Eg: {{product_photo}} becomes "[Image: product_photo]"
|
||||
# This gives the model textual context about what media exists in the prompt
|
||||
var_type = value.type.replace("input_", "").replace("_", " ").title()
|
||||
text_substitutions[name] = f"[{var_type}: {name}]"
|
||||
|
||||
def replace_variable(match: re.Match[str]) -> str:
|
||||
var_name = match.group(1).strip()
|
||||
return str(text_substitutions.get(var_name, match.group(0)))
|
||||
|
||||
pattern = r"\{\{\s*(\w+)\s*\}\}"
|
||||
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
|
||||
|
||||
# Insert system message with resolved text
|
||||
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
|
||||
|
||||
# If we have media, create a new user message because allows to ingest images and files
|
||||
if media_content_parts:
|
||||
messages.append(OpenAIUserMessageParam(content=media_content_parts))
|
||||
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
|
@ -273,6 +355,14 @@ class OpenAIResponsesImpl:
|
|||
|
||||
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
|
||||
|
||||
# Validate that Safety API is available if guardrails are requested
|
||||
if guardrail_ids and self.safety_api is None:
|
||||
raise ValueError(
|
||||
"Cannot process guardrails: Safety API is not configured.\n\n"
|
||||
"To use guardrails, ensure the Safety API is configured in your stack, or remove "
|
||||
"the 'guardrails' parameter from your request."
|
||||
)
|
||||
|
||||
if conversation is not None:
|
||||
if previous_response_id is not None:
|
||||
raise ValueError(
|
||||
|
|
@ -289,6 +379,7 @@ class OpenAIResponsesImpl:
|
|||
input=input,
|
||||
conversation=conversation,
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
instructions=instructions,
|
||||
previous_response_id=previous_response_id,
|
||||
store=store,
|
||||
|
|
@ -342,6 +433,7 @@ class OpenAIResponsesImpl:
|
|||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
store: bool | None = True,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
|
|
@ -364,6 +456,9 @@ class OpenAIResponsesImpl:
|
|||
if instructions:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||
|
||||
# Prepend reusable prompt (if provided)
|
||||
await self._prepend_prompt(messages, prompt)
|
||||
|
||||
# Structured outputs
|
||||
response_format = await convert_response_text_to_chat_response_format(text)
|
||||
|
||||
|
|
@ -386,6 +481,7 @@ class OpenAIResponsesImpl:
|
|||
ctx=ctx,
|
||||
response_id=response_id,
|
||||
created_at=created_at,
|
||||
prompt=prompt,
|
||||
text=text,
|
||||
max_infer_iters=max_infer_iters,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ from llama_stack_api import (
|
|||
OpenAIResponseUsage,
|
||||
OpenAIResponseUsageInputTokensDetails,
|
||||
OpenAIResponseUsageOutputTokensDetails,
|
||||
OpenAIToolMessageParam,
|
||||
Safety,
|
||||
WebSearchToolTypes,
|
||||
)
|
||||
|
||||
|
|
@ -111,7 +113,7 @@ class StreamingResponseOrchestrator:
|
|||
max_infer_iters: int,
|
||||
tool_executor, # Will be the tool execution logic from the main class
|
||||
instructions: str | None,
|
||||
safety_api,
|
||||
safety_api: Safety | None,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
|
|
@ -905,10 +907,16 @@ class StreamingResponseOrchestrator:
|
|||
"""Coordinate execution of both function and non-function tool calls."""
|
||||
# Execute non-function tool calls
|
||||
for tool_call in non_function_tool_calls:
|
||||
# Check if total calls made to built-in and mcp tools exceed max_tool_calls
|
||||
# if total calls made to built-in and mcp tools exceed max_tool_calls
|
||||
# then create a tool response message indicating the call was skipped
|
||||
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
|
||||
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
|
||||
break
|
||||
skipped_call_message = OpenAIToolMessageParam(
|
||||
content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
next_turn_messages.append(skipped_call_message)
|
||||
continue
|
||||
|
||||
# Find the item_id for this tool call
|
||||
matching_item_id = None
|
||||
|
|
|
|||
|
|
@ -5,11 +5,14 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import mimetypes
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
|
||||
from llama_stack_api import (
|
||||
Files,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
|
|
@ -18,6 +21,8 @@ from llama_stack_api import (
|
|||
OpenAIChatCompletionToolCallFunction,
|
||||
OpenAIChoice,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIFile,
|
||||
OpenAIFileFile,
|
||||
OpenAIImageURL,
|
||||
OpenAIJSONSchema,
|
||||
OpenAIMessageParam,
|
||||
|
|
@ -29,6 +34,7 @@ from llama_stack_api import (
|
|||
OpenAIResponseInput,
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseInputMessageContent,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
|
|
@ -37,9 +43,11 @@ from llama_stack_api import (
|
|||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageContent,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
|
|
@ -49,6 +57,46 @@ from llama_stack_api import (
|
|||
)
|
||||
|
||||
|
||||
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
|
||||
"""
|
||||
Extract raw bytes from file using the Files API.
|
||||
|
||||
:param file_id: The file identifier (e.g., "file-abc123")
|
||||
:param files_api: Files API instance
|
||||
:returns: Raw file content as bytes
|
||||
:raises: ValueError if file cannot be retrieved
|
||||
"""
|
||||
try:
|
||||
response = await files_api.openai_retrieve_file_content(file_id)
|
||||
return bytes(response.body)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
|
||||
|
||||
|
||||
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
|
||||
"""
|
||||
Converts raw binary bytes into a safe ASCII text representation for URLs
|
||||
|
||||
:param raw_bytes: the actual bytes that represents file content
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
return base64.b64encode(raw_bytes).decode("utf-8")
|
||||
|
||||
|
||||
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
|
||||
"""
|
||||
Construct data url with decoded data inside
|
||||
|
||||
:param ascii_text: ASCII content
|
||||
:param mime_type: MIME type of file
|
||||
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
|
||||
"""
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
return f"data:{mime_type};base64,{ascii_text}"
|
||||
|
||||
|
||||
async def convert_chat_choice_to_response_message(
|
||||
choice: OpenAIChoice,
|
||||
citation_files: dict[str, str] | None = None,
|
||||
|
|
@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
|
|||
|
||||
async def convert_response_content_to_chat_content(
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
|
||||
files_api: Files | None,
|
||||
) -> str | list[OpenAIChatCompletionContentPartParam]:
|
||||
"""
|
||||
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
|
||||
|
||||
The content schemas of each API look similar, but are not exactly the same.
|
||||
|
||||
:param content: The content to convert
|
||||
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
|
@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
|
|||
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
|
||||
detail = content_part.detail
|
||||
image_mime_type = None
|
||||
if content_part.image_url:
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
elif content_part.file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
|
||||
if image_file_response.filename:
|
||||
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
|
||||
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
|
||||
image_data_url = construct_data_url(ascii_text, image_mime_type)
|
||||
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Image content must have either 'image_url' or 'file_id'. "
|
||||
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
|
||||
)
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
|
||||
resolved_file_data = None
|
||||
file_data = content_part.file_data
|
||||
file_id = content_part.file_id
|
||||
file_url = content_part.file_url
|
||||
filename = content_part.filename
|
||||
file_mime_type = None
|
||||
if not any([file_data, file_id, file_url]):
|
||||
raise ValueError(
|
||||
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
|
||||
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
|
||||
)
|
||||
if file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
|
||||
file_response = await files_api.openai_retrieve_file(file_id)
|
||||
if not filename:
|
||||
filename = file_response.filename
|
||||
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
|
||||
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
|
||||
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
|
||||
elif file_data:
|
||||
if file_data.startswith("data:"):
|
||||
resolved_file_data = file_data
|
||||
else:
|
||||
# Raw base64 data, wrap in data URL format
|
||||
if filename:
|
||||
file_mime_type, _ = mimetypes.guess_type(filename)
|
||||
resolved_file_data = construct_data_url(file_data, file_mime_type)
|
||||
elif file_url:
|
||||
resolved_file_data = file_url
|
||||
converted_parts.append(
|
||||
OpenAIFile(
|
||||
file=OpenAIFileFile(
|
||||
file_data=resolved_file_data,
|
||||
filename=filename,
|
||||
)
|
||||
)
|
||||
)
|
||||
elif isinstance(content_part, str):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
|
||||
else:
|
||||
|
|
@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
|
|||
async def convert_response_input_to_chat_messages(
|
||||
input: str | list[OpenAIResponseInput],
|
||||
previous_messages: list[OpenAIMessageParam] | None = None,
|
||||
files_api: Files | None = None,
|
||||
) -> list[OpenAIMessageParam]:
|
||||
"""
|
||||
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
|
||||
|
||||
:param input: The input to convert
|
||||
:param previous_messages: Optional previous messages to check for function_call references
|
||||
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
|
||||
"""
|
||||
messages: list[OpenAIMessageParam] = []
|
||||
if isinstance(input, list):
|
||||
|
|
@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
|
|||
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
|
||||
# the tool list will be handled separately
|
||||
pass
|
||||
elif isinstance(
|
||||
input_item,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
):
|
||||
# these tool calls are tracked internally but not converted to chat messages
|
||||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
|
||||
input_item, OpenAIResponseMCPApprovalResponse
|
||||
):
|
||||
|
|
@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
|
|||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMessage):
|
||||
# Narrow type to OpenAIResponseMessage which has content and role attributes
|
||||
content = await convert_response_content_to_chat_content(input_item.content)
|
||||
content = await convert_response_content_to_chat_content(input_item.content, files_api)
|
||||
message_type = await get_message_type_by_role(input_item.role)
|
||||
if message_type is None:
|
||||
raise ValueError(
|
||||
|
|
@ -320,11 +439,15 @@ def is_function_tool_call(
|
|||
return False
|
||||
|
||||
|
||||
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
|
||||
async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
|
||||
"""Run guardrails against messages and return violation message if blocked."""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# If safety API is not available, skip guardrails
|
||||
if safety_api is None:
|
||||
return None
|
||||
|
||||
# Look up shields to get their provider_resource_id (actual model ID)
|
||||
model_ids = []
|
||||
# TODO: list_shields not in Safety interface but available at runtime via API routing
|
||||
|
|
|
|||
|
|
@ -30,11 +30,15 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
|
||||
api_dependencies=[
|
||||
Api.inference,
|
||||
Api.safety,
|
||||
Api.vector_io,
|
||||
Api.tool_runtime,
|
||||
Api.tool_groups,
|
||||
Api.conversations,
|
||||
Api.prompts,
|
||||
Api.files,
|
||||
],
|
||||
optional_api_dependencies=[
|
||||
Api.safety,
|
||||
],
|
||||
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import AzureConfig
|
||||
|
|
@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
|
|||
|
||||
Returns the Azure API base URL from the configuration.
|
||||
"""
|
||||
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AzureConfig(RemoteInferenceProviderConfig):
|
||||
api_base: HttpUrl = Field(
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
|
||||
)
|
||||
api_version: str | None = Field(
|
||||
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
||||
|
|
@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
|
|||
def sample_run_config(
|
||||
cls,
|
||||
api_key: str = "${env.AZURE_API_KEY:=}",
|
||||
api_base: str = "${env.AZURE_API_BASE:=}",
|
||||
base_url: str = "${env.AZURE_API_BASE:=}",
|
||||
api_version: str = "${env.AZURE_API_VERSION:=}",
|
||||
api_type: str = "${env.AZURE_API_TYPE:=}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
"api_base": api_base,
|
||||
"base_url": base_url,
|
||||
"api_version": api_version,
|
||||
"api_type": api_type,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
|
|||
"""
|
||||
|
||||
config: BedrockConfig
|
||||
provider_data_api_key_field: str = "aws_bedrock_api_key"
|
||||
provider_data_api_key_field: str = "aws_bearer_token_bedrock"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get base URL for OpenAI client."""
|
||||
|
|
@ -111,7 +111,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
|
|||
logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
|
||||
raise ValueError(
|
||||
"AWS Bedrock authentication failed: Bearer token has expired. "
|
||||
"The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. "
|
||||
"The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
|
||||
"Please refresh your token by generating a new pre-signed URL with AWS credentials. "
|
||||
"Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
|
||||
) from e
|
||||
|
|
|
|||
|
|
@ -12,9 +12,9 @@ from llama_stack.providers.utils.inference.model_registry import RemoteInference
|
|||
|
||||
|
||||
class BedrockProviderDataValidator(BaseModel):
|
||||
aws_bedrock_api_key: str | None = Field(
|
||||
aws_bearer_token_bedrock: str | None = Field(
|
||||
default=None,
|
||||
description="API key for Amazon Bedrock",
|
||||
description="API Key (Bearer token) for Amazon Bedrock",
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -27,6 +27,6 @@ class BedrockConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs):
|
||||
return {
|
||||
"api_key": "${env.AWS_BEDROCK_API_KEY:=}",
|
||||
"api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
|
||||
"region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack_api import (
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
|
|
@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "cerebras_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return urljoin(self.config.base_url, "v1")
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
||||
DEFAULT_BASE_URL = "https://api.cerebras.ai"
|
||||
DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
|
||||
|
||||
|
||||
class CerebrasProviderDataValidator(BaseModel):
|
||||
|
|
@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class CerebrasImplConfig(RemoteInferenceProviderConfig):
|
||||
base_url: str = Field(
|
||||
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
|
||||
description="Base URL for the Cerebras API",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the Databricks model serving endpoint",
|
||||
description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
|
||||
)
|
||||
auth_credential: SecretStr | None = Field(
|
||||
default=None,
|
||||
|
|
@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.DATABRICKS_HOST:=}",
|
||||
base_url: str = "${env.DATABRICKS_HOST:=}",
|
||||
api_token: str = "${env.DATABRICKS_TOKEN:=}",
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_token": api_token,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
|
|||
}
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/serving-endpoints"
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
# Filter out None values from endpoint names
|
||||
api_token = self._get_api_key_from_config_or_provider_data()
|
||||
# WorkspaceClient expects base host without /serving-endpoints suffix
|
||||
base_url_str = str(self.config.base_url)
|
||||
if base_url_str.endswith("/serving-endpoints"):
|
||||
host = base_url_str[:-18] # Remove '/serving-endpoints'
|
||||
else:
|
||||
host = base_url_str
|
||||
return [
|
||||
endpoint.name # type: ignore[misc]
|
||||
for endpoint in WorkspaceClient(
|
||||
host=self.config.url, token=api_token
|
||||
host=host, token=api_token
|
||||
).serving_endpoints.list() # TODO: this is not async
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class FireworksImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.fireworks.ai/inference/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.fireworks.ai/inference/v1"),
|
||||
description="The URL for the Fireworks server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.fireworks.ai/inference/v1",
|
||||
"base_url": "https://api.fireworks.ai/inference/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "fireworks_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return "https://api.fireworks.ai/inference/v1"
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class GroqConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.groq.com",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.groq.com/openai/v1"),
|
||||
description="The URL for the Groq AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.groq.com",
|
||||
"base_url": "https://api.groq.com/openai/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "groq_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/openai/v1"
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class LlamaCompatConfig(RemoteInferenceProviderConfig):
|
||||
openai_compat_api_base: str = Field(
|
||||
default="https://api.llama.com/compat/v1/",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.llama.com/compat/v1/"),
|
||||
description="The URL for the Llama API server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
|
||||
"base_url": "https://api.llama.com/compat/v1/",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The Llama API base URL
|
||||
"""
|
||||
return self.config.openai_compat_api_base
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
URL of your running NVIDIA NIM and do not need to set the api_key.
|
||||
"""
|
||||
|
||||
url: str = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
|
||||
base_url: HttpUrl | None = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
|
||||
description="A base url for accessing the NVIDIA NIM",
|
||||
)
|
||||
timeout: int = Field(
|
||||
default=60,
|
||||
description="Timeout for the HTTP requests",
|
||||
)
|
||||
append_api_version: bool = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
|
||||
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
|
||||
)
|
||||
rerank_model_to_url: dict[str, str] = Field(
|
||||
default_factory=lambda: {
|
||||
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
|
||||
|
|
@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||
base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
|
||||
api_key: str = "${env.NVIDIA_API_KEY:=}",
|
||||
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"append_api_version": append_api_version,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
|
||||
|
||||
if _is_nvidia_hosted(self.config):
|
||||
if not self.config.auth_credential:
|
||||
|
|
@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The NVIDIA API base URL
|
||||
"""
|
||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -8,4 +8,4 @@ from . import NVIDIAConfig
|
|||
|
||||
|
||||
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
|
||||
return "integrate.api.nvidia.com" in config.url
|
||||
return "integrate.api.nvidia.com" in str(config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,20 +6,22 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field, SecretStr
|
||||
from pydantic import Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
|
||||
DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
||||
DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
|
||||
|
||||
|
||||
class OllamaImplConfig(RemoteInferenceProviderConfig):
|
||||
auth_credential: SecretStr | None = Field(default=None, exclude=True)
|
||||
|
||||
url: str = DEFAULT_OLLAMA_URL
|
||||
base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(
|
||||
cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
|
|||
# ollama client attaches itself to the current event loop (sadly?)
|
||||
loop = asyncio.get_running_loop()
|
||||
if loop not in self._clients:
|
||||
self._clients[loop] = AsyncOllamaClient(host=self.config.url)
|
||||
# Ollama client expects base URL without /v1 suffix
|
||||
base_url_str = str(self.config.base_url)
|
||||
if base_url_str.endswith("/v1"):
|
||||
host = base_url_str[:-3]
|
||||
else:
|
||||
host = base_url_str
|
||||
self._clients[loop] = AsyncOllamaClient(host=host)
|
||||
return self._clients[loop]
|
||||
|
||||
def get_api_key(self):
|
||||
return "NO KEY REQUIRED"
|
||||
|
||||
def get_base_url(self):
|
||||
return self.config.url.rstrip("/") + "/v1"
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
||||
logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
|
||||
r = await self.health()
|
||||
if r["status"] == HealthStatus.ERROR:
|
||||
logger.warning(
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIConfig(RemoteInferenceProviderConfig):
|
||||
base_url: str = Field(
|
||||
default="https://api.openai.com/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.openai.com/v1"),
|
||||
description="Base URL for OpenAI API",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
|
|||
|
||||
Returns the OpenAI API base URL from the configuration.
|
||||
"""
|
||||
return self.config.base_url
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class PassthroughImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the passthrough endpoint",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
|
|||
|
||||
def _get_passthrough_url(self) -> str:
|
||||
"""Get the passthrough URL from config or provider data."""
|
||||
if self.config.url is not None:
|
||||
return self.config.url
|
||||
if self.config.base_url is not None:
|
||||
return str(self.config.base_url)
|
||||
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data is None:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RunpodImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the Runpod model serving endpoint",
|
||||
)
|
||||
|
|
@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.RUNPOD_URL:=}",
|
||||
"base_url": "${env.RUNPOD_URL:=}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
|
|||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get base URL for OpenAI client."""
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SambaNovaImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.sambanova.ai/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.sambanova.ai/v1"),
|
||||
description="The URL for the SambaNova AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.sambanova.ai/v1",
|
||||
"base_url": "https://api.sambanova.ai/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The SambaNova base URL
|
||||
"""
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
|
|||
class TGIImplConfig(RemoteInferenceProviderConfig):
|
||||
auth_credential: SecretStr | None = Field(default=None, exclude=True)
|
||||
|
||||
url: str = Field(
|
||||
description="The URL for the TGI serving endpoint",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the TGI serving endpoint (should include /v1 path)",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.TGI_URL:=}",
|
||||
base_url: str = "${env.TGI_URL:=}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
from collections.abc import Iterable
|
||||
|
||||
from huggingface_hub import AsyncInferenceClient, HfApi
|
||||
from pydantic import SecretStr
|
||||
from pydantic import HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
|
@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
|
|||
|
||||
|
||||
class _HfAdapter(OpenAIMixin):
|
||||
url: str
|
||||
base_url: HttpUrl
|
||||
api_key: SecretStr
|
||||
|
||||
hf_client: AsyncInferenceClient
|
||||
|
|
@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
|
|||
return "NO KEY REQUIRED"
|
||||
|
||||
def get_base_url(self):
|
||||
return self.url
|
||||
return self.base_url
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [self.model_id]
|
||||
|
|
@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
|
|||
|
||||
class TGIAdapter(_HfAdapter):
|
||||
async def initialize(self, config: TGIImplConfig) -> None:
|
||||
if not config.url:
|
||||
if not config.base_url:
|
||||
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
||||
log.info(f"Initializing TGI client with url={config.url}")
|
||||
self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
|
||||
log.info(f"Initializing TGI client with url={config.base_url}")
|
||||
# Extract base URL without /v1 for HF client initialization
|
||||
base_url_str = str(config.base_url).rstrip("/")
|
||||
if base_url_str.endswith("/v1"):
|
||||
base_url_for_client = base_url_str[:-3]
|
||||
else:
|
||||
base_url_for_client = base_url_str
|
||||
self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
|
||||
endpoint_info = await self.hf_client.get_endpoint_info()
|
||||
self.max_tokens = endpoint_info["max_total_tokens"]
|
||||
self.model_id = endpoint_info["model_id"]
|
||||
self.url = f"{config.url.rstrip('/')}/v1"
|
||||
self.base_url = config.base_url
|
||||
self.api_key = SecretStr("NO_KEY")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class TogetherImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.together.xyz/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.together.xyz/v1"),
|
||||
description="The URL for the Together AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"base_url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from collections.abc import Iterable
|
|||
from typing import Any, cast
|
||||
|
||||
from together import AsyncTogether # type: ignore[import-untyped]
|
||||
from together.constants import BASE_URL # type: ignore[import-untyped]
|
||||
|
||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
|
|||
provider_data_api_key_field: str = "together_api_key"
|
||||
|
||||
def get_base_url(self):
|
||||
return BASE_URL
|
||||
return str(self.config.base_url)
|
||||
|
||||
def _get_client(self) -> AsyncTogether:
|
||||
together_api_key = None
|
||||
|
|
|
|||
|
|
@ -51,4 +51,4 @@ class VertexAIInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: An iterable of model IDs
|
||||
"""
|
||||
return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"]
|
||||
return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, SecretStr, field_validator
|
||||
from pydantic import Field, HttpUrl, SecretStr, field_validator
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the vLLM model serving endpoint",
|
||||
)
|
||||
|
|
@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.VLLM_URL:=}",
|
||||
base_url: str = "${env.VLLM_URL:=}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:=fake}",
|
||||
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
|
||||
|
|
|
|||
|
|
@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get the base URL from config."""
|
||||
if not self.config.url:
|
||||
if not self.config.base_url:
|
||||
raise ValueError("No base URL configured")
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
if not self.config.url:
|
||||
if not self.config.base_url:
|
||||
raise ValueError(
|
||||
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
||||
)
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue