Merge branch 'main' into fix/issue-3185

This commit is contained in:
Roy Belio 2025-11-26 10:58:20 +02:00 committed by GitHub
commit 9b3c041af0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
139 changed files with 16350 additions and 825 deletions

2
.github/CODEOWNERS vendored
View file

@ -2,4 +2,4 @@
# These owners will be the default owners for everything in # These owners will be the default owners for everything in
# the repo. Unless a later match takes precedence, # the repo. Unless a later match takes precedence,
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo * @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo @cdoern

View file

@ -0,0 +1,35 @@
name: Setup TypeScript client
description: Conditionally checkout and link llama-stack-client-typescript based on client-version
inputs:
client-version:
description: 'Client version (latest or published)'
required: true
outputs:
ts-client-path:
description: 'Path or version to use for TypeScript client'
value: ${{ steps.set-path.outputs.ts-client-path }}
runs:
using: "composite"
steps:
- name: Checkout TypeScript client (latest)
if: ${{ inputs.client-version == 'latest' }}
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: llamastack/llama-stack-client-typescript
ref: main
path: .ts-client-checkout
- name: Set TS_CLIENT_PATH
id: set-path
shell: bash
run: |
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
else
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -27,7 +27,7 @@ jobs:
steps: steps:
- name: Checkout PR branch - name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 # Need full history to access main branch fetch-depth: 0 # Need full history to access main branch
@ -37,7 +37,7 @@ jobs:
python-version: '3.12' python-version: '3.12'
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
with: with:
enable-cache: true enable-cache: true
@ -151,7 +151,7 @@ jobs:
steps: steps:
- name: Checkout PR branch - name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 fetch-depth: 0
@ -236,7 +236,7 @@ jobs:
steps: steps:
- name: Checkout PR branch - name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 fetch-depth: 0
@ -405,7 +405,7 @@ jobs:
steps: steps:
- name: Checkout PR branch - name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 fetch-depth: 0
@ -415,7 +415,7 @@ jobs:
python-version: '3.12' python-version: '3.12'
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
with: with:
enable-cache: true enable-cache: true

View file

@ -17,13 +17,13 @@ jobs:
pull-requests: write # for peter-evans/create-pull-request to create a PR pull-requests: write # for peter-evans/create-pull-request to create a PR
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
ref: main ref: main
fetch-depth: 0 fetch-depth: 0
- run: | - run: |
python ./scripts/gen-changelog.py python ./scripts/gen-changelog.py
- uses: peter-evans/create-pull-request@271a8d0340265f705b14b6d32b9829c1cb33d45e # v7.0.8 - uses: peter-evans/create-pull-request@84ae59a2cdc2258d6fa0732dd66352dddae2a412 # v7.0.9
with: with:
title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' title: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'
commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}' commit-message: 'docs: update CHANGELOG.md for ${{ github.ref_name }}'

View file

@ -35,7 +35,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout PR Code - name: Checkout PR Code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 fetch-depth: 0
@ -59,7 +59,7 @@ jobs:
# This allows us to diff the current changes against the previous state # This allows us to diff the current changes against the previous state
- name: Checkout Base Branch - name: Checkout Base Branch
if: steps.skip-check.outputs.skip != 'true' if: steps.skip-check.outputs.skip != 'true'
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
ref: ${{ github.event.pull_request.base.ref }} ref: ${{ github.event.pull_request.base.ref }}
path: 'base' path: 'base'

View file

@ -16,14 +16,14 @@ jobs:
lint: lint:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # 5.0.0 - uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # 6.0.0
- name: Run ShellCheck on install.sh - name: Run ShellCheck on install.sh
run: shellcheck scripts/install.sh run: shellcheck scripts/install.sh
smoke-test-on-dev: smoke-test-on-dev:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -35,7 +35,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -48,7 +48,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -50,7 +50,7 @@ jobs:
matrix: ${{ steps.set-matrix.outputs.matrix }} matrix: ${{ steps.set-matrix.outputs.matrix }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Generate test matrix - name: Generate test matrix
id: set-matrix id: set-matrix
@ -81,7 +81,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Setup test environment - name: Setup test environment
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
@ -93,11 +93,27 @@ jobs:
suite: ${{ matrix.config.suite }} suite: ${{ matrix.config.suite }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Setup Node.js for TypeScript client tests
if: ${{ matrix.client == 'server' }}
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: tests/integration/client-typescript/package-lock.json
- name: Setup TypeScript client
if: ${{ matrix.client == 'server' }}
id: setup-ts-client
uses: ./.github/actions/setup-typescript-client
with:
client-version: ${{ matrix.client-version }}
- name: Run tests - name: Run tests
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
uses: ./.github/actions/run-and-record-tests uses: ./.github/actions/run-and-record-tests
env: env:
OPENAI_API_KEY: dummy OPENAI_API_KEY: dummy
TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
with: with:
stack-config: >- stack-config: >-
${{ matrix.config.stack_config ${{ matrix.config.stack_config

View file

@ -37,7 +37,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -22,7 +22,7 @@ jobs:
steps: steps:
- name: Checkout code - name: Checkout code
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
# For dependabot PRs, we need to checkout with a token that can push changes # For dependabot PRs, we need to checkout with a token that can push changes
token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }} token: ${{ github.actor == 'dependabot[bot]' && secrets.GITHUB_TOKEN || github.token }}
@ -46,7 +46,7 @@ jobs:
cache-dependency-path: 'src/llama_stack_ui/' cache-dependency-path: 'src/llama_stack_ui/'
- name: Set up uv - name: Set up uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
- name: Install npm dependencies - name: Install npm dependencies
run: npm ci run: npm ci

View file

@ -40,7 +40,7 @@ jobs:
distros: ${{ steps.set-matrix.outputs.distros }} distros: ${{ steps.set-matrix.outputs.distros }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Generate Distribution List - name: Generate Distribution List
id: set-matrix id: set-matrix
@ -59,7 +59,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
@ -93,7 +93,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
@ -106,7 +106,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
@ -146,7 +146,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -36,7 +36,7 @@ jobs:
distros: ${{ steps.set-matrix.outputs.distros }} distros: ${{ steps.set-matrix.outputs.distros }}
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Generate Distribution List - name: Generate Distribution List
id: set-matrix id: set-matrix
@ -55,7 +55,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
@ -79,7 +79,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner
@ -92,7 +92,7 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -21,10 +21,10 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2 uses: astral-sh/setup-uv@1e862dfacbd1d6d858c55d9b792c756523627244 # v7.1.4
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
activate-environment: true activate-environment: true

View file

@ -46,7 +46,7 @@ jobs:
echo "::endgroup::" echo "::endgroup::"
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
fetch-depth: 0 fetch-depth: 0

View file

@ -43,7 +43,41 @@ env:
# Stainless organization dashboard # Stainless organization dashboard
jobs: jobs:
compute-branch:
runs-on: ubuntu-latest
outputs:
preview_branch: ${{ steps.compute.outputs.preview_branch }}
base_branch: ${{ steps.compute.outputs.base_branch }}
merge_branch: ${{ steps.compute.outputs.merge_branch }}
steps:
- name: Compute branch names
id: compute
run: |
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
BASE_REPO="${{ github.repository }}"
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
# Fork PR: prefix with fork owner for isolation
if [ -z "$FORK_OWNER" ]; then
echo "Error: Fork PR detected but fork owner is empty" >&2
exit 1
fi
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
else
# Same-repo PR
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${BRANCH_NAME}"
fi
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
preview: preview:
needs: compute-branch
if: github.event.action != 'closed' if: github.event.action != 'closed'
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -53,16 +87,14 @@ jobs:
# Checkout the PR's code to access the OpenAPI spec and config files. # Checkout the PR's code to access the OpenAPI spec and config files.
# This is necessary to read the spec/config from the PR (including from forks). # This is necessary to read the spec/config from the PR (including from forks).
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
repository: ${{ github.event.pull_request.head.repo.full_name }} repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.sha }} ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 2 fetch-depth: 2
# This action builds preview SDKs from the OpenAPI spec changes and
# posts/updates a comment on the PR with build results and links to the preview.
- name: Run preview builds - name: Run preview builds
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/preview@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
with: with:
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
org: ${{ env.STAINLESS_ORG }} org: ${{ env.STAINLESS_ORG }}
@ -73,8 +105,11 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
branch: ${{ needs.compute-branch.outputs.preview_branch }}
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
merge: merge:
needs: compute-branch
if: github.event.action == 'closed' && github.event.pull_request.merged == true if: github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -84,20 +119,20 @@ jobs:
# Checkout the PR's code to access the OpenAPI spec and config files. # Checkout the PR's code to access the OpenAPI spec and config files.
# This is necessary to read the spec/config from the PR (including from forks). # This is necessary to read the spec/config from the PR (including from forks).
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
with: with:
repository: ${{ github.event.pull_request.head.repo.full_name }} repository: ${{ github.event.pull_request.head.repo.full_name }}
ref: ${{ github.event.pull_request.head.sha }} ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 2 fetch-depth: 2
# Note that this only merges in changes that happened on the last build on # Note that this only merges in changes that happened on the last build on
# preview/${{ github.head_ref }}. It's possible that there are OAS/config # the computed preview branch. It's possible that there are OAS/config
# changes that haven't been built, if the preview-sdk job didn't finish # changes that haven't been built, if the preview job didn't finish
# before this step starts. In theory we want to wait for all builds # before this step starts. In theory we want to wait for all builds
# against preview/${{ github.head_ref }} to complete, but assuming that # against the preview branch to complete, but assuming that
# the preview-sdk job happens before the PR merge, it should be fine. # the preview job happens before the PR merge, it should be fine.
- name: Run merge build - name: Run merge build
uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/merge@9133735bca5ce0a1df7d3b26e75364e26137a016 # 1.7.0
with: with:
stainless_api_key: ${{ secrets.STAINLESS_API_KEY }} stainless_api_key: ${{ secrets.STAINLESS_API_KEY }}
org: ${{ env.STAINLESS_ORG }} org: ${{ env.STAINLESS_ORG }}
@ -108,3 +143,4 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}

View file

@ -27,7 +27,7 @@ jobs:
# container and point 'uv pip install' to the correct path... # container and point 'uv pip install' to the correct path...
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -27,7 +27,7 @@ jobs:
# container and point 'uv pip install' to the correct path... # container and point 'uv pip install' to the correct path...
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

View file

@ -26,7 +26,7 @@ jobs:
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Setup Node.js - name: Setup Node.js
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0 uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0

View file

@ -36,7 +36,7 @@ jobs:
- "3.13" - "3.13"
steps: steps:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@1af3b93b6815bc44a9784bd300feb67ff0d1eeb3 # v6.0.0
- name: Install dependencies - name: Install dependencies
uses: ./.github/actions/setup-runner uses: ./.github/actions/setup-runner

2
.gitignore vendored
View file

@ -35,3 +35,5 @@ docs/static/imported-files/
docs/docs/api-deprecated/ docs/docs/api-deprecated/
docs/docs/api-experimental/ docs/docs/api-experimental/
docs/docs/api/ docs/docs/api/
tests/integration/client-typescript/node_modules/
.ts-client-checkout/

View file

@ -9862,9 +9862,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `api_base` | `HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | | `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
```yaml ```yaml
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
``` ```

View file

@ -22,6 +22,6 @@ AWS Bedrock inference provider using OpenAI compatible endpoint.
## Sample Configuration ## Sample Configuration
```yaml ```yaml
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
``` ```

View file

@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API | | `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The Databricks API token | | `api_token` | `SecretStr \| None` | No | | The Databricks API token |
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.DATABRICKS_HOST:=} base_url: ${env.DATABRICKS_HOST:=}
api_token: ${env.DATABRICKS_TOKEN:=} api_token: ${env.DATABRICKS_TOKEN:=}
``` ```

View file

@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | | `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server | | `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | | `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
openai_compat_api_base: https://api.llama.com/compat/v1/ base_url: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY} api_key: ${env.LLAMA_API_KEY}
``` ```

View file

@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | | `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
``` ```

View file

@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `str` | No | http://localhost:11434 | | | `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
``` ```

View file

@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API | | `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
## Sample Configuration ## Sample Configuration

View file

@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | | The URL for the passthrough endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.PASSTHROUGH_URL} base_url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY} api_key: ${env.PASSTHROUGH_API_KEY}
``` ```

View file

@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.RUNPOD_URL:=} base_url: ${env.RUNPOD_URL:=}
api_token: ${env.RUNPOD_API_TOKEN} api_token: ${env.RUNPOD_API_TOKEN}
``` ```

View file

@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | | `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
``` ```

View file

@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `str` | No | | The URL for the TGI serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
``` ```

View file

@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server | | `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
``` ```

View file

@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint |
| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. | | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `project_id` | `str \| None` | No | | The watsonx.ai project ID |
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
``` ```

122
docs/package-lock.json generated
View file

@ -10712,12 +10712,6 @@
"integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==", "integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
"license": "Unlicense" "license": "Unlicense"
}, },
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
"license": "ISC"
},
"node_modules/fsevents": { "node_modules/fsevents": {
"version": "2.3.3", "version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@ -10821,21 +10815,20 @@
"license": "ISC" "license": "ISC"
}, },
"node_modules/glob": { "node_modules/glob": {
"version": "7.2.3", "version": "10.5.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
"deprecated": "Glob versions prior to v9 are no longer supported",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"fs.realpath": "^1.0.0", "foreground-child": "^3.1.0",
"inflight": "^1.0.4", "jackspeak": "^3.1.2",
"inherits": "2", "minimatch": "^9.0.4",
"minimatch": "^3.1.1", "minipass": "^7.1.2",
"once": "^1.3.0", "package-json-from-dist": "^1.0.0",
"path-is-absolute": "^1.0.0" "path-scurry": "^1.11.1"
}, },
"engines": { "bin": {
"node": "*" "glob": "dist/esm/bin.mjs"
}, },
"funding": { "funding": {
"url": "https://github.com/sponsors/isaacs" "url": "https://github.com/sponsors/isaacs"
@ -10859,26 +10852,19 @@
"integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
"license": "BSD-2-Clause" "license": "BSD-2-Clause"
}, },
"node_modules/glob/node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"node_modules/glob/node_modules/minimatch": { "node_modules/glob/node_modules/minimatch": {
"version": "3.1.2", "version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"brace-expansion": "^1.1.7" "brace-expansion": "^2.0.1"
}, },
"engines": { "engines": {
"node": "*" "node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
} }
}, },
"node_modules/global-dirs": { "node_modules/global-dirs": {
@ -11792,17 +11778,6 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/inflight": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
"deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
"license": "ISC",
"dependencies": {
"once": "^1.3.0",
"wrappy": "1"
}
},
"node_modules/inherits": { "node_modules/inherits": {
"version": "2.0.4", "version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@ -15570,15 +15545,6 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"license": "ISC",
"dependencies": {
"wrappy": "1"
}
},
"node_modules/onetime": { "node_modules/onetime": {
"version": "5.1.2", "version": "5.1.2",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@ -15955,15 +15921,6 @@
"node": "^12.20.0 || ^14.13.1 || >=16.0.0" "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
} }
}, },
"node_modules/path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/path-is-inside": { "node_modules/path-is-inside": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz", "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
@ -20038,41 +19995,6 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/sucrase/node_modules/glob": {
"version": "10.4.5",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
"license": "ISC",
"dependencies": {
"foreground-child": "^3.1.0",
"jackspeak": "^3.1.2",
"minimatch": "^9.0.4",
"minipass": "^7.1.2",
"package-json-from-dist": "^1.0.0",
"path-scurry": "^1.11.1"
},
"bin": {
"glob": "dist/esm/bin.mjs"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/sucrase/node_modules/minimatch": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/supports-color": { "node_modules/supports-color": {
"version": "7.2.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@ -21620,12 +21542,6 @@
"url": "https://github.com/chalk/strip-ansi?sponsor=1" "url": "https://github.com/chalk/strip-ansi?sponsor=1"
} }
}, },
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC"
},
"node_modules/write-file-atomic": { "node_modules/write-file-atomic": {
"version": "3.0.3", "version": "3.0.3",
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz", "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",

View file

@ -31,6 +31,9 @@
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"remark-code-import": "^1.2.0" "remark-code-import": "^1.2.0"
}, },
"overrides": {
"glob": "^10.5.0"
},
"browserslist": { "browserslist": {
"production": [ "production": [
">0.5%", ">0.5%",

View file

@ -6705,9 +6705,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -6061,9 +6061,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -8883,9 +8883,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -9862,9 +9862,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -287,9 +287,9 @@ start_container() {
# On macOS/Windows, use host.docker.internal to reach host from container # On macOS/Windows, use host.docker.internal to reach host from container
# On Linux with --network host, use localhost # On Linux with --network host, use localhost
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}" OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
else else
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
fi fi
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"

View file

@ -16,16 +16,16 @@ import sys
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
def get_setup_env_vars(setup_name, suite_name=None): def get_setup_config(setup_name, suite_name=None):
""" """
Get environment variables for a setup, with optional suite default fallback. Get full configuration (env vars + defaults) for a setup.
Args: Args:
setup_name: Name of the setup (e.g., 'ollama', 'gpt') setup_name: Name of the setup (e.g., 'ollama', 'gpt')
suite_name: Optional suite name to get default setup if setup_name is None suite_name: Optional suite name to get default setup if setup_name is None
Returns: Returns:
Dictionary of environment variables Dictionary with 'env' and 'defaults' keys
""" """
# If no setup specified, try to get default from suite # If no setup specified, try to get default from suite
if not setup_name and suite_name: if not setup_name and suite_name:
@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
setup_name = suite.default_setup setup_name = suite.default_setup
if not setup_name: if not setup_name:
return {} return {"env": {}, "defaults": {}}
setup = SETUP_DEFINITIONS.get(setup_name) setup = SETUP_DEFINITIONS.get(setup_name)
if not setup: if not setup:
@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
) )
sys.exit(1) sys.exit(1)
return setup.env return {"env": setup.env, "defaults": setup.defaults}
def main(): def main():
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup") parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)") parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided") parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)") parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
args = parser.parse_args() args = parser.parse_args()
env_vars = get_setup_env_vars(args.setup, args.suite) config = get_setup_config(args.setup, args.suite)
if args.format == "bash": if args.format == "bash":
# Output as bash export statements # Output env vars as bash export statements
for key, value in env_vars.items(): for key, value in config["env"].items():
print(f"export {key}='{value}'") print(f"export {key}='{value}'")
# Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
for key, value in config["defaults"].items():
env_key = f"LLAMA_STACK_TEST_{key.upper()}"
print(f"export {env_key}='{value}'")
elif args.format == "json": elif args.format == "json":
import json import json
print(json.dumps(env_vars)) print(json.dumps(config))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
--network llama-net \ --network llama-net \
-p "${PORT}:${PORT}" \ -p "${PORT}:${PORT}" \
"${server_env_opts[@]}" \ "${server_env_opts[@]}" \
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
"${SERVER_IMAGE}" --port "${PORT}") "${SERVER_IMAGE}" --port "${PORT}")
log "🦙 Starting Llama Stack..." log "🦙 Starting Llama Stack..."

View file

@ -20,6 +20,7 @@ TEST_PATTERN=""
INFERENCE_MODE="replay" INFERENCE_MODE="replay"
EXTRA_PARAMS="" EXTRA_PARAMS=""
COLLECT_ONLY=false COLLECT_ONLY=false
TYPESCRIPT_ONLY=false
# Function to display usage # Function to display usage
usage() { usage() {
@ -34,6 +35,7 @@ Options:
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite) --subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--pattern STRING Regex pattern to pass to pytest -k --pattern STRING Regex pattern to pass to pytest -k
--collect-only Collect tests only without running them (skips server startup) --collect-only Collect tests only without running them (skips server startup)
--typescript-only Skip Python tests and run only TypeScript client tests
--help Show this help message --help Show this help message
Suites are defined in tests/integration/suites.py and define which tests to run. Suites are defined in tests/integration/suites.py and define which tests to run.
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
COLLECT_ONLY=true COLLECT_ONLY=true
shift shift
;; ;;
--typescript-only)
TYPESCRIPT_ONLY=true
shift
;;
--help) --help)
usage usage
exit 0 exit 0
@ -181,6 +187,10 @@ echo "$SETUP_ENV"
eval "$SETUP_ENV" eval "$SETUP_ENV"
echo "" echo ""
# Export suite and setup names for TypeScript tests
export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
ROOT_DIR="$THIS_DIR/.." ROOT_DIR="$THIS_DIR/.."
cd $ROOT_DIR cd $ROOT_DIR
@ -212,6 +222,71 @@ find_available_port() {
return 1 return 1
} }
run_client_ts_tests() {
if ! command -v npm &>/dev/null; then
echo "npm could not be found; ensure Node.js is installed"
return 1
fi
pushd tests/integration/client-typescript >/dev/null
# Determine if TS_CLIENT_PATH is a directory path or an npm version
if [[ -d "$TS_CLIENT_PATH" ]]; then
# It's a directory path - use local checkout
if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
popd >/dev/null
return 1
fi
echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
# Build the TypeScript client first
echo "Building TypeScript client..."
pushd "$TS_CLIENT_PATH" >/dev/null
npm install --silent
npm run build --silent
popd >/dev/null
# Install other dependencies first
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
else
npm install --silent
fi
# Then install the client from local directory
echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
npm install "$TS_CLIENT_PATH" --silent
else
# It's an npm version specifier - install from npm
echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
else
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
fi
fi
# Verify installation
echo "Verifying llama-stack-client installation..."
if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
echo "✅ llama-stack-client successfully installed"
npm list llama-stack-client
else
echo "❌ llama-stack-client not found in node_modules"
echo "Installed packages:"
npm list --depth=0
popd >/dev/null
return 1
fi
echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
npm test
popd >/dev/null
}
# Start Llama Stack Server if needed # Start Llama Stack Server if needed
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
# Find an available port for the server # Find an available port for the server
@ -221,6 +296,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
stop_server() { stop_server() {
@ -298,6 +374,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
echo "=== Building Docker Image for distribution: $DISTRO ===" echo "=== Building Docker Image for distribution: $DISTRO ==="
@ -473,7 +550,9 @@ if [[ -n "$STACK_CONFIG" ]]; then
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG" STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
fi fi
pytest -s -v $PYTEST_TARGET \ # Run Python tests unless typescript-only mode
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
pytest -s -v $PYTEST_TARGET \
$STACK_CONFIG_ARG \ $STACK_CONFIG_ARG \
--inference-mode="$INFERENCE_MODE" \ --inference-mode="$INFERENCE_MODE" \
-k "$PYTEST_PATTERN" \ -k "$PYTEST_PATTERN" \
@ -482,7 +561,12 @@ pytest -s -v $PYTEST_TARGET \
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--color=yes $EXTRA_PARAMS \ --color=yes $EXTRA_PARAMS \
--capture=tee-sys --capture=tee-sys
exit_code=$? exit_code=$?
else
echo "Skipping Python tests (--typescript-only mode)"
exit_code=0
fi
set +x set +x
set -e set -e
@ -506,5 +590,10 @@ else
exit 1 exit 1
fi fi
# Run TypeScript client tests if TS_CLIENT_PATH is set
if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
run_client_ts_tests
fi
echo "" echo ""
echo "=== Integration Tests Complete ===" echo "=== Integration Tests Complete ==="

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -16,9 +16,8 @@ providers:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:

View file

@ -16,9 +16,8 @@ providers:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
vector_io: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss

View file

@ -27,12 +27,12 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
vector_io: vector_io:
- provider_id: sqlite-vec - provider_id: sqlite-vec

View file

@ -11,7 +11,7 @@ providers:
- provider_id: vllm-inference - provider_id: vllm-inference
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=http://localhost:8000/v1} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,44 +17,43 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
config: config:
api_key: ${env.AWS_BEDROCK_API_KEY:=} api_key: ${env.AWS_BEARER_TOKEN_BEDROCK:=}
region_name: ${env.AWS_DEFAULT_REGION:=us-east-2} region_name: ${env.AWS_DEFAULT_REGION:=us-east-2}
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -15,7 +15,7 @@ providers:
- provider_id: watsonx - provider_id: watsonx
provider_type: remote::watsonx provider_type: remote::watsonx
config: config:
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
vector_io: vector_io:

View file

@ -23,12 +23,14 @@ async def get_provider_impl(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.vector_io], deps[Api.vector_io],
deps[Api.safety], deps.get(Api.safety),
deps[Api.tool_runtime], deps[Api.tool_runtime],
deps[Api.tool_groups], deps[Api.tool_groups],
deps[Api.conversations], deps[Api.conversations],
policy, deps[Api.prompts],
deps[Api.files],
telemetry_enabled, telemetry_enabled,
policy,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
from llama_stack_api import ( from llama_stack_api import (
Agents, Agents,
Conversations, Conversations,
Files,
Inference, Inference,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
@ -22,6 +23,7 @@ from llama_stack_api import (
OpenAIResponsePrompt, OpenAIResponsePrompt,
OpenAIResponseText, OpenAIResponseText,
Order, Order,
Prompts,
ResponseGuardrail, ResponseGuardrail,
Safety, Safety,
ToolGroups, ToolGroups,
@ -41,10 +43,12 @@ class MetaReferenceAgentsImpl(Agents):
config: MetaReferenceAgentsImplConfig, config: MetaReferenceAgentsImplConfig,
inference_api: Inference, inference_api: Inference,
vector_io_api: VectorIO, vector_io_api: VectorIO,
safety_api: Safety, safety_api: Safety | None,
tool_runtime_api: ToolRuntime, tool_runtime_api: ToolRuntime,
tool_groups_api: ToolGroups, tool_groups_api: ToolGroups,
conversations_api: Conversations, conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule], policy: list[AccessRule],
telemetry_enabled: bool = False, telemetry_enabled: bool = False,
): ):
@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api self.conversations_api = conversations_api
self.telemetry_enabled = telemetry_enabled self.telemetry_enabled = telemetry_enabled
self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl() self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl: OpenAIResponsesImpl | None = None self.openai_responses_impl: OpenAIResponsesImpl | None = None
self.policy = policy self.policy = policy
@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
vector_io_api=self.vector_io_api, vector_io_api=self.vector_io_api,
safety_api=self.safety_api, safety_api=self.safety_api,
conversations_api=self.conversations_api, conversations_api=self.conversations_api,
prompts_api=self.prompts_api,
files_api=self.files_api,
) )
async def shutdown(self) -> None: async def shutdown(self) -> None:

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import re
import time import time
import uuid import uuid
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
from llama_stack_api import ( from llama_stack_api import (
ConversationItem, ConversationItem,
Conversations, Conversations,
Files,
Inference, Inference,
InvalidConversationIdError, InvalidConversationIdError,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
OpenAIChatCompletionContentPartParam,
OpenAIDeleteResponseObject, OpenAIDeleteResponseObject,
OpenAIMessageParam, OpenAIMessageParam,
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseMessage, OpenAIResponseMessage,
@ -34,7 +39,9 @@ from llama_stack_api import (
OpenAIResponseText, OpenAIResponseText,
OpenAIResponseTextFormat, OpenAIResponseTextFormat,
OpenAISystemMessageParam, OpenAISystemMessageParam,
OpenAIUserMessageParam,
Order, Order,
Prompts,
ResponseGuardrailSpec, ResponseGuardrailSpec,
Safety, Safety,
ToolGroups, ToolGroups,
@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
from .tool_executor import ToolExecutor from .tool_executor import ToolExecutor
from .types import ChatCompletionContext, ToolContext from .types import ChatCompletionContext, ToolContext
from .utils import ( from .utils import (
convert_response_content_to_chat_content,
convert_response_input_to_chat_messages, convert_response_input_to_chat_messages,
convert_response_text_to_chat_response_format, convert_response_text_to_chat_response_format,
extract_guardrail_ids, extract_guardrail_ids,
@ -67,8 +75,10 @@ class OpenAIResponsesImpl:
tool_runtime_api: ToolRuntime, tool_runtime_api: ToolRuntime,
responses_store: ResponsesStore, responses_store: ResponsesStore,
vector_io_api: VectorIO, # VectorIO vector_io_api: VectorIO, # VectorIO
safety_api: Safety, safety_api: Safety | None,
conversations_api: Conversations, conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
): ):
self.inference_api = inference_api self.inference_api = inference_api
self.tool_groups_api = tool_groups_api self.tool_groups_api = tool_groups_api
@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
tool_runtime_api=tool_runtime_api, tool_runtime_api=tool_runtime_api,
vector_io_api=vector_io_api, vector_io_api=vector_io_api,
) )
self.prompts_api = prompts_api
self.files_api = files_api
async def _prepend_previous_response( async def _prepend_previous_response(
self, self,
@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
# Use stored messages directly and convert only new input # Use stored messages directly and convert only new input
message_adapter = TypeAdapter(list[OpenAIMessageParam]) message_adapter = TypeAdapter(list[OpenAIMessageParam])
messages = message_adapter.validate_python(previous_response.messages) messages = message_adapter.validate_python(previous_response.messages)
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages) new_messages = await convert_response_input_to_chat_messages(
input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages) messages.extend(new_messages)
else: else:
# Backward compatibility: reconstruct from inputs # Backward compatibility: reconstruct from inputs
messages = await convert_response_input_to_chat_messages(all_input) messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
tool_context.recover_tools_from_previous_response(previous_response) tool_context.recover_tools_from_previous_response(previous_response)
elif conversation is not None: elif conversation is not None:
@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
all_input = input all_input = input
if not conversation_items.data: if not conversation_items.data:
# First turn - just convert the new input # First turn - just convert the new input
messages = await convert_response_input_to_chat_messages(input) messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
else: else:
if not stored_messages: if not stored_messages:
all_input = conversation_items.data all_input = conversation_items.data
@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
all_input = input all_input = input
messages = stored_messages or [] messages = stored_messages or []
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages) new_messages = await convert_response_input_to_chat_messages(
all_input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages) messages.extend(new_messages)
else: else:
all_input = input all_input = input
messages = await convert_response_input_to_chat_messages(all_input) messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
return all_input, messages, tool_context return all_input, messages, tool_context
async def _prepend_prompt(
self,
messages: list[OpenAIMessageParam],
openai_response_prompt: OpenAIResponsePrompt | None,
) -> None:
"""Prepend prompt template to messages, resolving text/image/file variables.
:param messages: List of OpenAIMessageParam objects
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
:returns: string of utf-8 characters
"""
if not openai_response_prompt or not openai_response_prompt.id:
return
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
if not cur_prompt or not cur_prompt.prompt:
return
cur_prompt_text = cur_prompt.prompt
cur_prompt_variables = cur_prompt.variables
if not openai_response_prompt.variables:
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
return
# Validate that all provided variables exist in the prompt
for name in openai_response_prompt.variables.keys():
if name not in cur_prompt_variables:
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
# Separate text and media variables
text_substitutions = {}
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
for name, value in openai_response_prompt.variables.items():
# Text variable found
if isinstance(value, OpenAIResponseInputMessageContentText):
text_substitutions[name] = value.text
# Media variable found
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
if isinstance(converted_parts, list):
media_content_parts.extend(converted_parts)
# Eg: {{product_photo}} becomes "[Image: product_photo]"
# This gives the model textual context about what media exists in the prompt
var_type = value.type.replace("input_", "").replace("_", " ").title()
text_substitutions[name] = f"[{var_type}: {name}]"
def replace_variable(match: re.Match[str]) -> str:
var_name = match.group(1).strip()
return str(text_substitutions.get(var_name, match.group(0)))
pattern = r"\{\{\s*(\w+)\s*\}\}"
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
# Insert system message with resolved text
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
# If we have media, create a new user message because allows to ingest images and files
if media_content_parts:
messages.append(OpenAIUserMessageParam(content=media_content_parts))
async def get_openai_response( async def get_openai_response(
self, self,
response_id: str, response_id: str,
@ -273,6 +355,14 @@ class OpenAIResponsesImpl:
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else [] guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
# Validate that Safety API is available if guardrails are requested
if guardrail_ids and self.safety_api is None:
raise ValueError(
"Cannot process guardrails: Safety API is not configured.\n\n"
"To use guardrails, ensure the Safety API is configured in your stack, or remove "
"the 'guardrails' parameter from your request."
)
if conversation is not None: if conversation is not None:
if previous_response_id is not None: if previous_response_id is not None:
raise ValueError( raise ValueError(
@ -289,6 +379,7 @@ class OpenAIResponsesImpl:
input=input, input=input,
conversation=conversation, conversation=conversation,
model=model, model=model,
prompt=prompt,
instructions=instructions, instructions=instructions,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
store=store, store=store,
@ -342,6 +433,7 @@ class OpenAIResponsesImpl:
instructions: str | None = None, instructions: str | None = None,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
prompt: OpenAIResponsePrompt | None = None,
store: bool | None = True, store: bool | None = True,
temperature: float | None = None, temperature: float | None = None,
text: OpenAIResponseText | None = None, text: OpenAIResponseText | None = None,
@ -364,6 +456,9 @@ class OpenAIResponsesImpl:
if instructions: if instructions:
messages.insert(0, OpenAISystemMessageParam(content=instructions)) messages.insert(0, OpenAISystemMessageParam(content=instructions))
# Prepend reusable prompt (if provided)
await self._prepend_prompt(messages, prompt)
# Structured outputs # Structured outputs
response_format = await convert_response_text_to_chat_response_format(text) response_format = await convert_response_text_to_chat_response_format(text)
@ -386,6 +481,7 @@ class OpenAIResponsesImpl:
ctx=ctx, ctx=ctx,
response_id=response_id, response_id=response_id,
created_at=created_at, created_at=created_at,
prompt=prompt,
text=text, text=text,
max_infer_iters=max_infer_iters, max_infer_iters=max_infer_iters,
parallel_tool_calls=parallel_tool_calls, parallel_tool_calls=parallel_tool_calls,

View file

@ -66,6 +66,8 @@ from llama_stack_api import (
OpenAIResponseUsage, OpenAIResponseUsage,
OpenAIResponseUsageInputTokensDetails, OpenAIResponseUsageInputTokensDetails,
OpenAIResponseUsageOutputTokensDetails, OpenAIResponseUsageOutputTokensDetails,
OpenAIToolMessageParam,
Safety,
WebSearchToolTypes, WebSearchToolTypes,
) )
@ -111,7 +113,7 @@ class StreamingResponseOrchestrator:
max_infer_iters: int, max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class tool_executor, # Will be the tool execution logic from the main class
instructions: str | None, instructions: str | None,
safety_api, safety_api: Safety | None,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None, parallel_tool_calls: bool | None = None,
@ -905,10 +907,16 @@ class StreamingResponseOrchestrator:
"""Coordinate execution of both function and non-function tool calls.""" """Coordinate execution of both function and non-function tool calls."""
# Execute non-function tool calls # Execute non-function tool calls
for tool_call in non_function_tool_calls: for tool_call in non_function_tool_calls:
# Check if total calls made to built-in and mcp tools exceed max_tool_calls # if total calls made to built-in and mcp tools exceed max_tool_calls
# then create a tool response message indicating the call was skipped
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls: if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.") logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
break skipped_call_message = OpenAIToolMessageParam(
content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
tool_call_id=tool_call.id,
)
next_turn_messages.append(skipped_call_message)
continue
# Find the item_id for this tool call # Find the item_id for this tool call
matching_item_id = None matching_item_id = None

View file

@ -5,11 +5,14 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import base64
import mimetypes
import re import re
import uuid import uuid
from collections.abc import Sequence from collections.abc import Sequence
from llama_stack_api import ( from llama_stack_api import (
Files,
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam, OpenAIChatCompletionContentPartParam,
@ -18,6 +21,8 @@ from llama_stack_api import (
OpenAIChatCompletionToolCallFunction, OpenAIChatCompletionToolCallFunction,
OpenAIChoice, OpenAIChoice,
OpenAIDeveloperMessageParam, OpenAIDeveloperMessageParam,
OpenAIFile,
OpenAIFileFile,
OpenAIImageURL, OpenAIImageURL,
OpenAIJSONSchema, OpenAIJSONSchema,
OpenAIMessageParam, OpenAIMessageParam,
@ -29,6 +34,7 @@ from llama_stack_api import (
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputMessageContent, OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool, OpenAIResponseInputTool,
@ -37,9 +43,11 @@ from llama_stack_api import (
OpenAIResponseMessage, OpenAIResponseMessage,
OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText, OpenAIResponseText,
OpenAISystemMessageParam, OpenAISystemMessageParam,
OpenAIToolMessageParam, OpenAIToolMessageParam,
@ -49,6 +57,46 @@ from llama_stack_api import (
) )
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
"""
Extract raw bytes from file using the Files API.
:param file_id: The file identifier (e.g., "file-abc123")
:param files_api: Files API instance
:returns: Raw file content as bytes
:raises: ValueError if file cannot be retrieved
"""
try:
response = await files_api.openai_retrieve_file_content(file_id)
return bytes(response.body)
except Exception as e:
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
"""
Converts raw binary bytes into a safe ASCII text representation for URLs
:param raw_bytes: the actual bytes that represents file content
:returns: string of utf-8 characters
"""
return base64.b64encode(raw_bytes).decode("utf-8")
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
"""
Construct data url with decoded data inside
:param ascii_text: ASCII content
:param mime_type: MIME type of file
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
"""
if not mime_type:
mime_type = "application/octet-stream"
return f"data:{mime_type};base64,{ascii_text}"
async def convert_chat_choice_to_response_message( async def convert_chat_choice_to_response_message(
choice: OpenAIChoice, choice: OpenAIChoice,
citation_files: dict[str, str] | None = None, citation_files: dict[str, str] | None = None,
@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
async def convert_response_content_to_chat_content( async def convert_response_content_to_chat_content(
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent], content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
files_api: Files | None,
) -> str | list[OpenAIChatCompletionContentPartParam]: ) -> str | list[OpenAIChatCompletionContentPartParam]:
""" """
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
The content schemas of each API look similar, but are not exactly the same. The content schemas of each API look similar, but are not exactly the same.
:param content: The content to convert
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
""" """
if isinstance(content, str): if isinstance(content, str):
return content return content
@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText): elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseInputMessageContentImage): elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
detail = content_part.detail
image_mime_type = None
if content_part.image_url: if content_part.image_url:
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail) image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif content_part.file_id:
if files_api is None:
raise ValueError("file_ids are not supported by this implementation of the Stack")
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
if image_file_response.filename:
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
image_data_url = construct_data_url(ascii_text, image_mime_type)
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
raise ValueError(
f"Image content must have either 'image_url' or 'file_id'. "
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
)
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
resolved_file_data = None
file_data = content_part.file_data
file_id = content_part.file_id
file_url = content_part.file_url
filename = content_part.filename
file_mime_type = None
if not any([file_data, file_id, file_url]):
raise ValueError(
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
)
if file_id:
if files_api is None:
raise ValueError("file_ids are not supported by this implementation of the Stack")
file_response = await files_api.openai_retrieve_file(file_id)
if not filename:
filename = file_response.filename
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
elif file_data:
if file_data.startswith("data:"):
resolved_file_data = file_data
else:
# Raw base64 data, wrap in data URL format
if filename:
file_mime_type, _ = mimetypes.guess_type(filename)
resolved_file_data = construct_data_url(file_data, file_mime_type)
elif file_url:
resolved_file_data = file_url
converted_parts.append(
OpenAIFile(
file=OpenAIFileFile(
file_data=resolved_file_data,
filename=filename,
)
)
)
elif isinstance(content_part, str): elif isinstance(content_part, str):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part)) converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
else: else:
@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
async def convert_response_input_to_chat_messages( async def convert_response_input_to_chat_messages(
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
previous_messages: list[OpenAIMessageParam] | None = None, previous_messages: list[OpenAIMessageParam] | None = None,
files_api: Files | None = None,
) -> list[OpenAIMessageParam]: ) -> list[OpenAIMessageParam]:
""" """
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages. Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
:param input: The input to convert :param input: The input to convert
:param previous_messages: Optional previous messages to check for function_call references :param previous_messages: Optional previous messages to check for function_call references
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
""" """
messages: list[OpenAIMessageParam] = [] messages: list[OpenAIMessageParam] = []
if isinstance(input, list): if isinstance(input, list):
@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools): elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
# the tool list will be handled separately # the tool list will be handled separately
pass pass
elif isinstance(
input_item,
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
):
# these tool calls are tracked internally but not converted to chat messages
pass
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance( elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
input_item, OpenAIResponseMCPApprovalResponse input_item, OpenAIResponseMCPApprovalResponse
): ):
@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
pass pass
elif isinstance(input_item, OpenAIResponseMessage): elif isinstance(input_item, OpenAIResponseMessage):
# Narrow type to OpenAIResponseMessage which has content and role attributes # Narrow type to OpenAIResponseMessage which has content and role attributes
content = await convert_response_content_to_chat_content(input_item.content) content = await convert_response_content_to_chat_content(input_item.content, files_api)
message_type = await get_message_type_by_role(input_item.role) message_type = await get_message_type_by_role(input_item.role)
if message_type is None: if message_type is None:
raise ValueError( raise ValueError(
@ -320,11 +439,15 @@ def is_function_tool_call(
return False return False
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None: async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
"""Run guardrails against messages and return violation message if blocked.""" """Run guardrails against messages and return violation message if blocked."""
if not messages: if not messages:
return None return None
# If safety API is not available, skip guardrails
if safety_api is None:
return None
# Look up shields to get their provider_resource_id (actual model ID) # Look up shields to get their provider_resource_id (actual model ID)
model_ids = [] model_ids = []
# TODO: list_shields not in Safety interface but available at runtime via API routing # TODO: list_shields not in Safety interface but available at runtime via API routing

View file

@ -30,11 +30,15 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig", config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
api_dependencies=[ api_dependencies=[
Api.inference, Api.inference,
Api.safety,
Api.vector_io, Api.vector_io,
Api.tool_runtime, Api.tool_runtime,
Api.tool_groups, Api.tool_groups,
Api.conversations, Api.conversations,
Api.prompts,
Api.files,
],
optional_api_dependencies=[
Api.safety,
], ],
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
), ),

View file

@ -4,8 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from urllib.parse import urljoin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import AzureConfig from .config import AzureConfig
@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
Returns the Azure API base URL from the configuration. Returns the Azure API base URL from the configuration.
""" """
return urljoin(str(self.config.api_base), "/openai/v1") return str(self.config.base_url)

View file

@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class AzureConfig(RemoteInferenceProviderConfig): class AzureConfig(RemoteInferenceProviderConfig):
api_base: HttpUrl = Field( base_url: HttpUrl | None = Field(
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)", default=None,
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
) )
api_version: str | None = Field( api_version: str | None = Field(
default_factory=lambda: os.getenv("AZURE_API_VERSION"), default_factory=lambda: os.getenv("AZURE_API_VERSION"),
@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
def sample_run_config( def sample_run_config(
cls, cls,
api_key: str = "${env.AZURE_API_KEY:=}", api_key: str = "${env.AZURE_API_KEY:=}",
api_base: str = "${env.AZURE_API_BASE:=}", base_url: str = "${env.AZURE_API_BASE:=}",
api_version: str = "${env.AZURE_API_VERSION:=}", api_version: str = "${env.AZURE_API_VERSION:=}",
api_type: str = "${env.AZURE_API_TYPE:=}", api_type: str = "${env.AZURE_API_TYPE:=}",
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"api_key": api_key, "api_key": api_key,
"api_base": api_base, "base_url": base_url,
"api_version": api_version, "api_version": api_version,
"api_type": api_type, "api_type": api_type,
} }

View file

@ -37,7 +37,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
""" """
config: BedrockConfig config: BedrockConfig
provider_data_api_key_field: str = "aws_bedrock_api_key" provider_data_api_key_field: str = "aws_bearer_token_bedrock"
def get_base_url(self) -> str: def get_base_url(self) -> str:
"""Get base URL for OpenAI client.""" """Get base URL for OpenAI client."""
@ -111,7 +111,7 @@ class BedrockInferenceAdapter(OpenAIMixin):
logger.error(f"AWS Bedrock authentication token expired: {error_msg}") logger.error(f"AWS Bedrock authentication token expired: {error_msg}")
raise ValueError( raise ValueError(
"AWS Bedrock authentication failed: Bearer token has expired. " "AWS Bedrock authentication failed: Bearer token has expired. "
"The AWS_BEDROCK_API_KEY environment variable contains an expired pre-signed URL. " "The AWS_BEARER_TOKEN_BEDROCK environment variable contains an expired pre-signed URL. "
"Please refresh your token by generating a new pre-signed URL with AWS credentials. " "Please refresh your token by generating a new pre-signed URL with AWS credentials. "
"Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints." "Refer to AWS Bedrock documentation for details on OpenAI-compatible endpoints."
) from e ) from e

View file

@ -12,9 +12,9 @@ from llama_stack.providers.utils.inference.model_registry import RemoteInference
class BedrockProviderDataValidator(BaseModel): class BedrockProviderDataValidator(BaseModel):
aws_bedrock_api_key: str | None = Field( aws_bearer_token_bedrock: str | None = Field(
default=None, default=None,
description="API key for Amazon Bedrock", description="API Key (Bearer token) for Amazon Bedrock",
) )
@ -27,6 +27,6 @@ class BedrockConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config(cls, **kwargs): def sample_run_config(cls, **kwargs):
return { return {
"api_key": "${env.AWS_BEDROCK_API_KEY:=}", "api_key": "${env.AWS_BEARER_TOKEN_BEDROCK:=}",
"region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}", "region_name": "${env.AWS_DEFAULT_REGION:=us-east-2}",
} }

View file

@ -4,8 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from urllib.parse import urljoin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack_api import ( from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody,
@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "cerebras_api_key" provider_data_api_key_field: str = "cerebras_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return urljoin(self.config.base_url, "v1") return str(self.config.base_url)
async def openai_embeddings( async def openai_embeddings(
self, self,

View file

@ -7,12 +7,12 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
DEFAULT_BASE_URL = "https://api.cerebras.ai" DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
class CerebrasProviderDataValidator(BaseModel): class CerebrasProviderDataValidator(BaseModel):
@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class CerebrasImplConfig(RemoteInferenceProviderConfig): class CerebrasImplConfig(RemoteInferenceProviderConfig):
base_url: str = Field( base_url: HttpUrl | None = Field(
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL), default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
description="Base URL for the Cerebras API", description="Base URL for the Cerebras API",
) )

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class DatabricksImplConfig(RemoteInferenceProviderConfig): class DatabricksImplConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the Databricks model serving endpoint", description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
) )
auth_credential: SecretStr | None = Field( auth_credential: SecretStr | None = Field(
default=None, default=None,
@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.DATABRICKS_HOST:=}", base_url: str = "${env.DATABRICKS_HOST:=}",
api_token: str = "${env.DATABRICKS_TOKEN:=}", api_token: str = "${env.DATABRICKS_TOKEN:=}",
**kwargs: Any, **kwargs: Any,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_token": api_token, "api_token": api_token,
} }

View file

@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
} }
def get_base_url(self) -> str: def get_base_url(self) -> str:
return f"{self.config.url}/serving-endpoints" return str(self.config.base_url)
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
# Filter out None values from endpoint names # Filter out None values from endpoint names
api_token = self._get_api_key_from_config_or_provider_data() api_token = self._get_api_key_from_config_or_provider_data()
# WorkspaceClient expects base host without /serving-endpoints suffix
base_url_str = str(self.config.base_url)
if base_url_str.endswith("/serving-endpoints"):
host = base_url_str[:-18] # Remove '/serving-endpoints'
else:
host = base_url_str
return [ return [
endpoint.name # type: ignore[misc] endpoint.name # type: ignore[misc]
for endpoint in WorkspaceClient( for endpoint in WorkspaceClient(
host=self.config.url, token=api_token host=host, token=api_token
).serving_endpoints.list() # TODO: this is not async ).serving_endpoints.list() # TODO: this is not async
] ]

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class FireworksImplConfig(RemoteInferenceProviderConfig): class FireworksImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.fireworks.ai/inference/v1", default=HttpUrl("https://api.fireworks.ai/inference/v1"),
description="The URL for the Fireworks server", description="The URL for the Fireworks server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.fireworks.ai/inference/v1", "base_url": "https://api.fireworks.ai/inference/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "fireworks_api_key" provider_data_api_key_field: str = "fireworks_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return "https://api.fireworks.ai/inference/v1" return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class GroqConfig(RemoteInferenceProviderConfig): class GroqConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.groq.com", default=HttpUrl("https://api.groq.com/openai/v1"),
description="The URL for the Groq AI server", description="The URL for the Groq AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.groq.com", "base_url": "https://api.groq.com/openai/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "groq_api_key" provider_data_api_key_field: str = "groq_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return f"{self.config.url}/openai/v1" return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class LlamaCompatConfig(RemoteInferenceProviderConfig): class LlamaCompatConfig(RemoteInferenceProviderConfig):
openai_compat_api_base: str = Field( base_url: HttpUrl | None = Field(
default="https://api.llama.com/compat/v1/", default=HttpUrl("https://api.llama.com/compat/v1/"),
description="The URL for the Llama API server", description="The URL for the Llama API server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
return { return {
"openai_compat_api_base": "https://api.llama.com/compat/v1/", "base_url": "https://api.llama.com/compat/v1/",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
:return: The Llama API base URL :return: The Llama API base URL
""" """
return self.config.openai_compat_api_base return str(self.config.base_url)
async def openai_completion( async def openai_completion(
self, self,

View file

@ -7,7 +7,7 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
URL of your running NVIDIA NIM and do not need to set the api_key. URL of your running NVIDIA NIM and do not need to set the api_key.
""" """
url: str = Field( base_url: HttpUrl | None = Field(
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"), default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
description="A base url for accessing the NVIDIA NIM", description="A base url for accessing the NVIDIA NIM",
) )
timeout: int = Field( timeout: int = Field(
default=60, default=60,
description="Timeout for the HTTP requests", description="Timeout for the HTTP requests",
) )
append_api_version: bool = Field(
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
)
rerank_model_to_url: dict[str, str] = Field( rerank_model_to_url: dict[str, str] = Field(
default_factory=lambda: { default_factory=lambda: {
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
api_key: str = "${env.NVIDIA_API_KEY:=}", api_key: str = "${env.NVIDIA_API_KEY:=}",
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_key": api_key, "api_key": api_key,
"append_api_version": append_api_version,
} }

View file

@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
} }
async def initialize(self) -> None: async def initialize(self) -> None:
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...") logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
if _is_nvidia_hosted(self.config): if _is_nvidia_hosted(self.config):
if not self.config.auth_credential: if not self.config.auth_credential:
@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
:return: The NVIDIA API base URL :return: The NVIDIA API base URL
""" """
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url return str(self.config.base_url)
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
""" """

View file

@ -8,4 +8,4 @@ from . import NVIDIAConfig
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
return "integrate.api.nvidia.com" in config.url return "integrate.api.nvidia.com" in str(config.base_url)

View file

@ -6,20 +6,22 @@
from typing import Any from typing import Any
from pydantic import Field, SecretStr from pydantic import Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
DEFAULT_OLLAMA_URL = "http://localhost:11434" DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
class OllamaImplConfig(RemoteInferenceProviderConfig): class OllamaImplConfig(RemoteInferenceProviderConfig):
auth_credential: SecretStr | None = Field(default=None, exclude=True) auth_credential: SecretStr | None = Field(default=None, exclude=True)
url: str = DEFAULT_OLLAMA_URL base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
@classmethod @classmethod
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]: def sample_run_config(
cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
} }

View file

@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
# ollama client attaches itself to the current event loop (sadly?) # ollama client attaches itself to the current event loop (sadly?)
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
if loop not in self._clients: if loop not in self._clients:
self._clients[loop] = AsyncOllamaClient(host=self.config.url) # Ollama client expects base URL without /v1 suffix
base_url_str = str(self.config.base_url)
if base_url_str.endswith("/v1"):
host = base_url_str[:-3]
else:
host = base_url_str
self._clients[loop] = AsyncOllamaClient(host=host)
return self._clients[loop] return self._clients[loop]
def get_api_key(self): def get_api_key(self):
return "NO KEY REQUIRED" return "NO KEY REQUIRED"
def get_base_url(self): def get_base_url(self):
return self.config.url.rstrip("/") + "/v1" return str(self.config.base_url)
async def initialize(self) -> None: async def initialize(self) -> None:
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...") logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
r = await self.health() r = await self.health()
if r["status"] == HealthStatus.ERROR: if r["status"] == HealthStatus.ERROR:
logger.warning( logger.warning(

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class OpenAIConfig(RemoteInferenceProviderConfig): class OpenAIConfig(RemoteInferenceProviderConfig):
base_url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.openai.com/v1", default=HttpUrl("https://api.openai.com/v1"),
description="Base URL for OpenAI API", description="Base URL for OpenAI API",
) )

View file

@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
Returns the OpenAI API base URL from the configuration. Returns the OpenAI API base URL from the configuration.
""" """
return self.config.base_url return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class PassthroughImplConfig(RemoteInferenceProviderConfig): class PassthroughImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the passthrough endpoint", description="The URL for the passthrough endpoint",
) )
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_key": api_key, "api_key": api_key,
} }

View file

@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
def _get_passthrough_url(self) -> str: def _get_passthrough_url(self) -> str:
"""Get the passthrough URL from config or provider data.""" """Get the passthrough URL from config or provider data."""
if self.config.url is not None: if self.config.base_url is not None:
return self.config.url return str(self.config.base_url)
provider_data = self.get_request_provider_data() provider_data = self.get_request_provider_data()
if provider_data is None: if provider_data is None:

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class RunpodImplConfig(RemoteInferenceProviderConfig): class RunpodImplConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the Runpod model serving endpoint", description="The URL for the Runpod model serving endpoint",
) )
@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return { return {
"url": "${env.RUNPOD_URL:=}", "base_url": "${env.RUNPOD_URL:=}",
"api_token": "${env.RUNPOD_API_TOKEN}", "api_token": "${env.RUNPOD_API_TOKEN}",
} }

View file

@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
def get_base_url(self) -> str: def get_base_url(self) -> str:
"""Get base URL for OpenAI client.""" """Get base URL for OpenAI client."""
return self.config.url return str(self.config.base_url)
async def openai_chat_completion( async def openai_chat_completion(
self, self,

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class SambaNovaImplConfig(RemoteInferenceProviderConfig): class SambaNovaImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.sambanova.ai/v1", default=HttpUrl("https://api.sambanova.ai/v1"),
description="The URL for the SambaNova AI server", description="The URL for the SambaNova AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.sambanova.ai/v1", "base_url": "https://api.sambanova.ai/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
:return: The SambaNova base URL :return: The SambaNova base URL
""" """
return self.config.url return str(self.config.base_url)

View file

@ -5,7 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
class TGIImplConfig(RemoteInferenceProviderConfig): class TGIImplConfig(RemoteInferenceProviderConfig):
auth_credential: SecretStr | None = Field(default=None, exclude=True) auth_credential: SecretStr | None = Field(default=None, exclude=True)
url: str = Field( base_url: HttpUrl | None = Field(
description="The URL for the TGI serving endpoint", default=None,
description="The URL for the TGI serving endpoint (should include /v1 path)",
) )
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.TGI_URL:=}", base_url: str = "${env.TGI_URL:=}",
**kwargs, **kwargs,
): ):
return { return {
"url": url, "base_url": base_url,
} }

View file

@ -8,7 +8,7 @@
from collections.abc import Iterable from collections.abc import Iterable
from huggingface_hub import AsyncInferenceClient, HfApi from huggingface_hub import AsyncInferenceClient, HfApi
from pydantic import SecretStr from pydantic import HttpUrl, SecretStr
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
class _HfAdapter(OpenAIMixin): class _HfAdapter(OpenAIMixin):
url: str base_url: HttpUrl
api_key: SecretStr api_key: SecretStr
hf_client: AsyncInferenceClient hf_client: AsyncInferenceClient
@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
return "NO KEY REQUIRED" return "NO KEY REQUIRED"
def get_base_url(self): def get_base_url(self):
return self.url return self.base_url
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
return [self.model_id] return [self.model_id]
@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
class TGIAdapter(_HfAdapter): class TGIAdapter(_HfAdapter):
async def initialize(self, config: TGIImplConfig) -> None: async def initialize(self, config: TGIImplConfig) -> None:
if not config.url: if not config.base_url:
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
log.info(f"Initializing TGI client with url={config.url}") log.info(f"Initializing TGI client with url={config.base_url}")
self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference") # Extract base URL without /v1 for HF client initialization
base_url_str = str(config.base_url).rstrip("/")
if base_url_str.endswith("/v1"):
base_url_for_client = base_url_str[:-3]
else:
base_url_for_client = base_url_str
self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
endpoint_info = await self.hf_client.get_endpoint_info() endpoint_info = await self.hf_client.get_endpoint_info()
self.max_tokens = endpoint_info["max_total_tokens"] self.max_tokens = endpoint_info["max_total_tokens"]
self.model_id = endpoint_info["model_id"] self.model_id = endpoint_info["model_id"]
self.url = f"{config.url.rstrip('/')}/v1" self.base_url = config.base_url
self.api_key = SecretStr("NO_KEY") self.api_key = SecretStr("NO_KEY")

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class TogetherImplConfig(RemoteInferenceProviderConfig): class TogetherImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.together.xyz/v1", default=HttpUrl("https://api.together.xyz/v1"),
description="The URL for the Together AI server", description="The URL for the Together AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.together.xyz/v1", "base_url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY:=}", "api_key": "${env.TOGETHER_API_KEY:=}",
} }

View file

@ -9,7 +9,6 @@ from collections.abc import Iterable
from typing import Any, cast from typing import Any, cast
from together import AsyncTogether # type: ignore[import-untyped] from together import AsyncTogether # type: ignore[import-untyped]
from together.constants import BASE_URL # type: ignore[import-untyped]
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
provider_data_api_key_field: str = "together_api_key" provider_data_api_key_field: str = "together_api_key"
def get_base_url(self): def get_base_url(self):
return BASE_URL return str(self.config.base_url)
def _get_client(self) -> AsyncTogether: def _get_client(self) -> AsyncTogether:
together_api_key = None together_api_key = None

View file

@ -51,4 +51,4 @@ class VertexAIInferenceAdapter(OpenAIMixin):
:return: An iterable of model IDs :return: An iterable of model IDs
""" """
return ["vertexai/gemini-2.0-flash", "vertexai/gemini-2.5-flash", "vertexai/gemini-2.5-pro"] return ["google/gemini-2.0-flash", "google/gemini-2.5-flash", "google/gemini-2.5-pro"]

View file

@ -6,7 +6,7 @@
from pathlib import Path from pathlib import Path
from pydantic import Field, SecretStr, field_validator from pydantic import Field, HttpUrl, SecretStr, field_validator
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig): class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the vLLM model serving endpoint", description="The URL for the vLLM model serving endpoint",
) )
@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.VLLM_URL:=}", base_url: str = "${env.VLLM_URL:=}",
**kwargs, **kwargs,
): ):
return { return {
"url": url, "base_url": base_url,
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
"api_token": "${env.VLLM_API_TOKEN:=fake}", "api_token": "${env.VLLM_API_TOKEN:=fake}",
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}", "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",

View file

@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
def get_base_url(self) -> str: def get_base_url(self) -> str:
"""Get the base URL from config.""" """Get the base URL from config."""
if not self.config.url: if not self.config.base_url:
raise ValueError("No base URL configured") raise ValueError("No base URL configured")
return self.config.url return str(self.config.base_url)
async def initialize(self) -> None: async def initialize(self) -> None:
if not self.config.url: if not self.config.base_url:
raise ValueError( raise ValueError(
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
) )

Some files were not shown because too many files have changed in this diff Show more