Merge branch 'main' into custom-collection-name-vectordb

This commit is contained in:
Roy Belio 2025-11-20 19:37:06 +02:00 committed by GitHub
commit 3f5576b7d6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
117 changed files with 16294 additions and 769 deletions

2
.github/CODEOWNERS vendored
View file

@ -2,4 +2,4 @@
# These owners will be the default owners for everything in # These owners will be the default owners for everything in
# the repo. Unless a later match takes precedence, # the repo. Unless a later match takes precedence,
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo * @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo

View file

@ -0,0 +1,35 @@
name: Setup TypeScript client
description: Conditionally checkout and link llama-stack-client-typescript based on client-version
inputs:
client-version:
description: 'Client version (latest or published)'
required: true
outputs:
ts-client-path:
description: 'Path or version to use for TypeScript client'
value: ${{ steps.set-path.outputs.ts-client-path }}
runs:
using: "composite"
steps:
- name: Checkout TypeScript client (latest)
if: ${{ inputs.client-version == 'latest' }}
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: llamastack/llama-stack-client-typescript
ref: main
path: .ts-client-checkout
- name: Set TS_CLIENT_PATH
id: set-path
shell: bash
run: |
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
else
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -93,11 +93,27 @@ jobs:
suite: ${{ matrix.config.suite }} suite: ${{ matrix.config.suite }}
inference-mode: 'replay' inference-mode: 'replay'
- name: Setup Node.js for TypeScript client tests
if: ${{ matrix.client == 'server' }}
uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: tests/integration/client-typescript/package-lock.json
- name: Setup TypeScript client
if: ${{ matrix.client == 'server' }}
id: setup-ts-client
uses: ./.github/actions/setup-typescript-client
with:
client-version: ${{ matrix.client-version }}
- name: Run tests - name: Run tests
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }} if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
uses: ./.github/actions/run-and-record-tests uses: ./.github/actions/run-and-record-tests
env: env:
OPENAI_API_KEY: dummy OPENAI_API_KEY: dummy
TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
with: with:
stack-config: >- stack-config: >-
${{ matrix.config.stack_config ${{ matrix.config.stack_config

View file

@ -43,7 +43,41 @@ env:
# Stainless organization dashboard # Stainless organization dashboard
jobs: jobs:
compute-branch:
runs-on: ubuntu-latest
outputs:
preview_branch: ${{ steps.compute.outputs.preview_branch }}
base_branch: ${{ steps.compute.outputs.base_branch }}
merge_branch: ${{ steps.compute.outputs.merge_branch }}
steps:
- name: Compute branch names
id: compute
run: |
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
BASE_REPO="${{ github.repository }}"
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
# Fork PR: prefix with fork owner for isolation
if [ -z "$FORK_OWNER" ]; then
echo "Error: Fork PR detected but fork owner is empty" >&2
exit 1
fi
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
else
# Same-repo PR
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
BASE_BRANCH="preview/base/${BRANCH_NAME}"
fi
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
preview: preview:
needs: compute-branch
if: github.event.action != 'closed' if: github.event.action != 'closed'
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -59,8 +93,6 @@ jobs:
ref: ${{ github.event.pull_request.head.sha }} ref: ${{ github.event.pull_request.head.sha }}
fetch-depth: 2 fetch-depth: 2
# This action builds preview SDKs from the OpenAPI spec changes and
# posts/updates a comment on the PR with build results and links to the preview.
- name: Run preview builds - name: Run preview builds
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
with: with:
@ -73,8 +105,11 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
branch: ${{ needs.compute-branch.outputs.preview_branch }}
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
merge: merge:
needs: compute-branch
if: github.event.action == 'closed' && github.event.pull_request.merged == true if: github.event.action == 'closed' && github.event.pull_request.merged == true
runs-on: ubuntu-latest runs-on: ubuntu-latest
permissions: permissions:
@ -91,11 +126,11 @@ jobs:
fetch-depth: 2 fetch-depth: 2
# Note that this only merges in changes that happened on the last build on # Note that this only merges in changes that happened on the last build on
# preview/${{ github.head_ref }}. It's possible that there are OAS/config # the computed preview branch. It's possible that there are OAS/config
# changes that haven't been built, if the preview-sdk job didn't finish # changes that haven't been built, if the preview job didn't finish
# before this step starts. In theory we want to wait for all builds # before this step starts. In theory we want to wait for all builds
# against preview/${{ github.head_ref }} to complete, but assuming that # against the preview branch to complete, but assuming that
# the preview-sdk job happens before the PR merge, it should be fine. # the preview job happens before the PR merge, it should be fine.
- name: Run merge build - name: Run merge build
uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0 uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
with: with:
@ -108,3 +143,4 @@ jobs:
base_sha: ${{ github.event.pull_request.base.sha }} base_sha: ${{ github.event.pull_request.base.sha }}
base_ref: ${{ github.event.pull_request.base.ref }} base_ref: ${{ github.event.pull_request.base.ref }}
head_sha: ${{ github.event.pull_request.head.sha }} head_sha: ${{ github.event.pull_request.head.sha }}
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}

2
.gitignore vendored
View file

@ -35,3 +35,5 @@ docs/static/imported-files/
docs/docs/api-deprecated/ docs/docs/api-deprecated/
docs/docs/api-experimental/ docs/docs/api-experimental/
docs/docs/api/ docs/docs/api/
tests/integration/client-typescript/node_modules/
.ts-client-checkout/

View file

@ -9862,9 +9862,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `api_base` | `HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) | | `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) | | `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) | | `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
```yaml ```yaml
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
``` ```

View file

@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API | | `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The Databricks API token | | `api_token` | `SecretStr \| None` | No | | The Databricks API token |
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.DATABRICKS_HOST:=} base_url: ${env.DATABRICKS_HOST:=}
api_token: ${env.DATABRICKS_TOKEN:=} api_token: ${env.DATABRICKS_TOKEN:=}
``` ```

View file

@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server | | `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server | | `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
``` ```

View file

@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server | | `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
openai_compat_api_base: https://api.llama.com/compat/v1/ base_url: https://api.llama.com/compat/v1/
api_key: ${env.LLAMA_API_KEY} api_key: ${env.LLAMA_API_KEY}
``` ```

View file

@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. | | `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
``` ```

View file

@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `str` | No | http://localhost:11434 | | | `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
``` ```

View file

@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API | | `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
## Sample Configuration ## Sample Configuration

View file

@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | | The URL for the passthrough endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.PASSTHROUGH_URL} base_url: ${env.PASSTHROUGH_URL}
api_key: ${env.PASSTHROUGH_API_KEY} api_key: ${env.PASSTHROUGH_API_KEY}
``` ```

View file

@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.RUNPOD_URL:=} base_url: ${env.RUNPOD_URL:=}
api_token: ${env.RUNPOD_API_TOKEN} api_token: ${env.RUNPOD_API_TOKEN}
``` ```

View file

@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server | | `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
``` ```

View file

@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|-------|------|----------|---------|-------------| |-------|------|----------|---------|-------------|
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `url` | `str` | No | | The URL for the TGI serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
``` ```

View file

@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server | | `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
``` ```

View file

@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_token` | `SecretStr \| None` | No | | The API token | | `api_token` | `SecretStr \| None` | No | | The API token |
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint | | `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint |
| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. | | `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. | | `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. | | `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider | | `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider | | `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai | | `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
| `project_id` | `str \| None` | No | | The watsonx.ai project ID | | `project_id` | `str \| None` | No | | The watsonx.ai project ID |
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests | | `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
## Sample Configuration ## Sample Configuration
```yaml ```yaml
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
``` ```

122
docs/package-lock.json generated
View file

@ -10712,12 +10712,6 @@
"integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==", "integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
"license": "Unlicense" "license": "Unlicense"
}, },
"node_modules/fs.realpath": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
"license": "ISC"
},
"node_modules/fsevents": { "node_modules/fsevents": {
"version": "2.3.3", "version": "2.3.3",
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
@ -10821,21 +10815,20 @@
"license": "ISC" "license": "ISC"
}, },
"node_modules/glob": { "node_modules/glob": {
"version": "7.2.3", "version": "10.5.0",
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", "resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", "integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
"deprecated": "Glob versions prior to v9 are no longer supported",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"fs.realpath": "^1.0.0", "foreground-child": "^3.1.0",
"inflight": "^1.0.4", "jackspeak": "^3.1.2",
"inherits": "2", "minimatch": "^9.0.4",
"minimatch": "^3.1.1", "minipass": "^7.1.2",
"once": "^1.3.0", "package-json-from-dist": "^1.0.0",
"path-is-absolute": "^1.0.0" "path-scurry": "^1.11.1"
}, },
"engines": { "bin": {
"node": "*" "glob": "dist/esm/bin.mjs"
}, },
"funding": { "funding": {
"url": "https://github.com/sponsors/isaacs" "url": "https://github.com/sponsors/isaacs"
@ -10859,26 +10852,19 @@
"integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==", "integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
"license": "BSD-2-Clause" "license": "BSD-2-Clause"
}, },
"node_modules/glob/node_modules/brace-expansion": {
"version": "1.1.12",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
"license": "MIT",
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
}
},
"node_modules/glob/node_modules/minimatch": { "node_modules/glob/node_modules/minimatch": {
"version": "3.1.2", "version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"brace-expansion": "^1.1.7" "brace-expansion": "^2.0.1"
}, },
"engines": { "engines": {
"node": "*" "node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
} }
}, },
"node_modules/global-dirs": { "node_modules/global-dirs": {
@ -11792,17 +11778,6 @@
"node": ">=12" "node": ">=12"
} }
}, },
"node_modules/inflight": {
"version": "1.0.6",
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
"deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
"license": "ISC",
"dependencies": {
"once": "^1.3.0",
"wrappy": "1"
}
},
"node_modules/inherits": { "node_modules/inherits": {
"version": "2.0.4", "version": "2.0.4",
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz", "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
@ -15570,15 +15545,6 @@
"node": ">= 0.8" "node": ">= 0.8"
} }
}, },
"node_modules/once": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
"license": "ISC",
"dependencies": {
"wrappy": "1"
}
},
"node_modules/onetime": { "node_modules/onetime": {
"version": "5.1.2", "version": "5.1.2",
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz", "resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
@ -15955,15 +15921,6 @@
"node": "^12.20.0 || ^14.13.1 || >=16.0.0" "node": "^12.20.0 || ^14.13.1 || >=16.0.0"
} }
}, },
"node_modules/path-is-absolute": {
"version": "1.0.1",
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
"license": "MIT",
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/path-is-inside": { "node_modules/path-is-inside": {
"version": "1.0.2", "version": "1.0.2",
"resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz", "resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
@ -20038,41 +19995,6 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/sucrase/node_modules/glob": {
"version": "10.4.5",
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
"license": "ISC",
"dependencies": {
"foreground-child": "^3.1.0",
"jackspeak": "^3.1.2",
"minimatch": "^9.0.4",
"minipass": "^7.1.2",
"package-json-from-dist": "^1.0.0",
"path-scurry": "^1.11.1"
},
"bin": {
"glob": "dist/esm/bin.mjs"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/sucrase/node_modules/minimatch": {
"version": "9.0.5",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
"license": "ISC",
"dependencies": {
"brace-expansion": "^2.0.1"
},
"engines": {
"node": ">=16 || 14 >=14.17"
},
"funding": {
"url": "https://github.com/sponsors/isaacs"
}
},
"node_modules/supports-color": { "node_modules/supports-color": {
"version": "7.2.0", "version": "7.2.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz", "resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
@ -21620,12 +21542,6 @@
"url": "https://github.com/chalk/strip-ansi?sponsor=1" "url": "https://github.com/chalk/strip-ansi?sponsor=1"
} }
}, },
"node_modules/wrappy": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"license": "ISC"
},
"node_modules/write-file-atomic": { "node_modules/write-file-atomic": {
"version": "3.0.3", "version": "3.0.3",
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz", "resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",

View file

@ -31,6 +31,9 @@
"react-dom": "^19.0.0", "react-dom": "^19.0.0",
"remark-code-import": "^1.2.0" "remark-code-import": "^1.2.0"
}, },
"overrides": {
"glob": "^10.5.0"
},
"browserslist": { "browserslist": {
"production": [ "production": [
">0.5%", ">0.5%",

View file

@ -6705,9 +6705,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -6061,9 +6061,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -8883,9 +8883,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -9862,9 +9862,21 @@ components:
title: Object title: Object
default: vector_store.file default: vector_store.file
attributes: attributes:
additionalProperties: true additionalProperties:
anyOf:
- type: string
maxLength: 512
- type: number
- type: boolean
title: string | number | boolean
propertyNames:
type: string
maxLength: 64
type: object type: object
maxProperties: 16
title: Attributes title: Attributes
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
x-oaiTypeLabel: map
chunking_strategy: chunking_strategy:
oneOf: oneOf:
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto' - $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'

View file

@ -287,9 +287,9 @@ start_container() {
# On macOS/Windows, use host.docker.internal to reach host from container # On macOS/Windows, use host.docker.internal to reach host from container
# On Linux with --network host, use localhost # On Linux with --network host, use localhost
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}" OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
else else
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}" OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
fi fi
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"

View file

@ -16,16 +16,16 @@ import sys
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
def get_setup_env_vars(setup_name, suite_name=None): def get_setup_config(setup_name, suite_name=None):
""" """
Get environment variables for a setup, with optional suite default fallback. Get full configuration (env vars + defaults) for a setup.
Args: Args:
setup_name: Name of the setup (e.g., 'ollama', 'gpt') setup_name: Name of the setup (e.g., 'ollama', 'gpt')
suite_name: Optional suite name to get default setup if setup_name is None suite_name: Optional suite name to get default setup if setup_name is None
Returns: Returns:
Dictionary of environment variables Dictionary with 'env' and 'defaults' keys
""" """
# If no setup specified, try to get default from suite # If no setup specified, try to get default from suite
if not setup_name and suite_name: if not setup_name and suite_name:
@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
setup_name = suite.default_setup setup_name = suite.default_setup
if not setup_name: if not setup_name:
return {} return {"env": {}, "defaults": {}}
setup = SETUP_DEFINITIONS.get(setup_name) setup = SETUP_DEFINITIONS.get(setup_name)
if not setup: if not setup:
@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
) )
sys.exit(1) sys.exit(1)
return setup.env return {"env": setup.env, "defaults": setup.defaults}
def main(): def main():
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup") parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)") parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided") parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)") parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
args = parser.parse_args() args = parser.parse_args()
env_vars = get_setup_env_vars(args.setup, args.suite) config = get_setup_config(args.setup, args.suite)
if args.format == "bash": if args.format == "bash":
# Output as bash export statements # Output env vars as bash export statements
for key, value in env_vars.items(): for key, value in config["env"].items():
print(f"export {key}='{value}'") print(f"export {key}='{value}'")
# Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
for key, value in config["defaults"].items():
env_key = f"LLAMA_STACK_TEST_{key.upper()}"
print(f"export {env_key}='{value}'")
elif args.format == "json": elif args.format == "json":
import json import json
print(json.dumps(env_vars)) print(json.dumps(config))
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
--network llama-net \ --network llama-net \
-p "${PORT}:${PORT}" \ -p "${PORT}:${PORT}" \
"${server_env_opts[@]}" \ "${server_env_opts[@]}" \
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \ -e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
"${SERVER_IMAGE}" --port "${PORT}") "${SERVER_IMAGE}" --port "${PORT}")
log "🦙 Starting Llama Stack..." log "🦙 Starting Llama Stack..."

View file

@ -20,6 +20,7 @@ TEST_PATTERN=""
INFERENCE_MODE="replay" INFERENCE_MODE="replay"
EXTRA_PARAMS="" EXTRA_PARAMS=""
COLLECT_ONLY=false COLLECT_ONLY=false
TYPESCRIPT_ONLY=false
# Function to display usage # Function to display usage
usage() { usage() {
@ -34,6 +35,7 @@ Options:
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite) --subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
--pattern STRING Regex pattern to pass to pytest -k --pattern STRING Regex pattern to pass to pytest -k
--collect-only Collect tests only without running them (skips server startup) --collect-only Collect tests only without running them (skips server startup)
--typescript-only Skip Python tests and run only TypeScript client tests
--help Show this help message --help Show this help message
Suites are defined in tests/integration/suites.py and define which tests to run. Suites are defined in tests/integration/suites.py and define which tests to run.
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
COLLECT_ONLY=true COLLECT_ONLY=true
shift shift
;; ;;
--typescript-only)
TYPESCRIPT_ONLY=true
shift
;;
--help) --help)
usage usage
exit 0 exit 0
@ -181,6 +187,10 @@ echo "$SETUP_ENV"
eval "$SETUP_ENV" eval "$SETUP_ENV"
echo "" echo ""
# Export suite and setup names for TypeScript tests
export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
ROOT_DIR="$THIS_DIR/.." ROOT_DIR="$THIS_DIR/.."
cd $ROOT_DIR cd $ROOT_DIR
@ -212,6 +222,71 @@ find_available_port() {
return 1 return 1
} }
run_client_ts_tests() {
if ! command -v npm &>/dev/null; then
echo "npm could not be found; ensure Node.js is installed"
return 1
fi
pushd tests/integration/client-typescript >/dev/null
# Determine if TS_CLIENT_PATH is a directory path or an npm version
if [[ -d "$TS_CLIENT_PATH" ]]; then
# It's a directory path - use local checkout
if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
popd >/dev/null
return 1
fi
echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
# Build the TypeScript client first
echo "Building TypeScript client..."
pushd "$TS_CLIENT_PATH" >/dev/null
npm install --silent
npm run build --silent
popd >/dev/null
# Install other dependencies first
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
else
npm install --silent
fi
# Then install the client from local directory
echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
npm install "$TS_CLIENT_PATH" --silent
else
# It's an npm version specifier - install from npm
echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
npm ci --silent
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
else
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
fi
fi
# Verify installation
echo "Verifying llama-stack-client installation..."
if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
echo "✅ llama-stack-client successfully installed"
npm list llama-stack-client
else
echo "❌ llama-stack-client not found in node_modules"
echo "Installed packages:"
npm list --depth=0
popd >/dev/null
return 1
fi
echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
npm test
popd >/dev/null
}
# Start Llama Stack Server if needed # Start Llama Stack Server if needed
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
# Find an available port for the server # Find an available port for the server
@ -221,6 +296,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
stop_server() { stop_server() {
@ -298,6 +374,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
exit 1 exit 1
fi fi
export LLAMA_STACK_PORT export LLAMA_STACK_PORT
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
echo "Will use port: $LLAMA_STACK_PORT" echo "Will use port: $LLAMA_STACK_PORT"
echo "=== Building Docker Image for distribution: $DISTRO ===" echo "=== Building Docker Image for distribution: $DISTRO ==="
@ -473,7 +550,9 @@ if [[ -n "$STACK_CONFIG" ]]; then
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG" STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
fi fi
pytest -s -v $PYTEST_TARGET \ # Run Python tests unless typescript-only mode
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
pytest -s -v $PYTEST_TARGET \
$STACK_CONFIG_ARG \ $STACK_CONFIG_ARG \
--inference-mode="$INFERENCE_MODE" \ --inference-mode="$INFERENCE_MODE" \
-k "$PYTEST_PATTERN" \ -k "$PYTEST_PATTERN" \
@ -482,7 +561,12 @@ pytest -s -v $PYTEST_TARGET \
--embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \ --embedding-model=sentence-transformers/nomic-ai/nomic-embed-text-v1.5 \
--color=yes $EXTRA_PARAMS \ --color=yes $EXTRA_PARAMS \
--capture=tee-sys --capture=tee-sys
exit_code=$? exit_code=$?
else
echo "Skipping Python tests (--typescript-only mode)"
exit_code=0
fi
set +x set +x
set -e set -e
@ -506,5 +590,10 @@ else
exit 1 exit 1
fi fi
# Run TypeScript client tests if TS_CLIENT_PATH is set
if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
run_client_ts_tests
fi
echo "" echo ""
echo "=== Integration Tests Complete ===" echo "=== Integration Tests Complete ==="

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -16,9 +16,8 @@ providers:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:

View file

@ -16,9 +16,8 @@ providers:
- provider_id: nvidia - provider_id: nvidia
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
vector_io: vector_io:
- provider_id: faiss - provider_id: faiss
provider_type: inline::faiss provider_type: inline::faiss

View file

@ -27,12 +27,12 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
vector_io: vector_io:
- provider_id: sqlite-vec - provider_id: sqlite-vec

View file

@ -11,7 +11,7 @@ providers:
- provider_id: vllm-inference - provider_id: vllm-inference
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=http://localhost:8000/v1} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -17,32 +17,32 @@ providers:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras} - provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras provider_type: remote::cerebras
config: config:
base_url: https://api.cerebras.ai base_url: https://api.cerebras.ai/v1
api_key: ${env.CEREBRAS_API_KEY:=} api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama} - provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama provider_type: remote::ollama
config: config:
url: ${env.OLLAMA_URL:=http://localhost:11434} base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
- provider_id: ${env.VLLM_URL:+vllm} - provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm provider_type: remote::vllm
config: config:
url: ${env.VLLM_URL:=} base_url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096} max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake} api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true} tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi} - provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi provider_type: remote::tgi
config: config:
url: ${env.TGI_URL:=} base_url: ${env.TGI_URL:=}
- provider_id: fireworks - provider_id: fireworks
provider_type: remote::fireworks provider_type: remote::fireworks
config: config:
url: https://api.fireworks.ai/inference/v1 base_url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=} api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together - provider_id: together
provider_type: remote::together provider_type: remote::together
config: config:
url: https://api.together.xyz/v1 base_url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=} api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock - provider_id: bedrock
provider_type: remote::bedrock provider_type: remote::bedrock
@ -52,9 +52,8 @@ providers:
- provider_id: ${env.NVIDIA_API_KEY:+nvidia} - provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia provider_type: remote::nvidia
config: config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com} base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
api_key: ${env.NVIDIA_API_KEY:=} api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai - provider_id: openai
provider_type: remote::openai provider_type: remote::openai
config: config:
@ -76,18 +75,18 @@ providers:
- provider_id: groq - provider_id: groq
provider_type: remote::groq provider_type: remote::groq
config: config:
url: https://api.groq.com base_url: https://api.groq.com/openai/v1
api_key: ${env.GROQ_API_KEY:=} api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova - provider_id: sambanova
provider_type: remote::sambanova provider_type: remote::sambanova
config: config:
url: https://api.sambanova.ai/v1 base_url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=} api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure} - provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure provider_type: remote::azure
config: config:
api_key: ${env.AZURE_API_KEY:=} api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=} base_url: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=} api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=} api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers - provider_id: sentence-transformers

View file

@ -15,7 +15,7 @@ providers:
- provider_id: watsonx - provider_id: watsonx
provider_type: remote::watsonx provider_type: remote::watsonx
config: config:
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com} base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=} api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=} project_id: ${env.WATSONX_PROJECT_ID:=}
vector_io: vector_io:

View file

@ -23,12 +23,14 @@ async def get_provider_impl(
config, config,
deps[Api.inference], deps[Api.inference],
deps[Api.vector_io], deps[Api.vector_io],
deps[Api.safety], deps.get(Api.safety),
deps[Api.tool_runtime], deps[Api.tool_runtime],
deps[Api.tool_groups], deps[Api.tool_groups],
deps[Api.conversations], deps[Api.conversations],
policy, deps[Api.prompts],
deps[Api.files],
telemetry_enabled, telemetry_enabled,
policy,
) )
await impl.initialize() await impl.initialize()
return impl return impl

View file

@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
from llama_stack_api import ( from llama_stack_api import (
Agents, Agents,
Conversations, Conversations,
Files,
Inference, Inference,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
@ -22,6 +23,7 @@ from llama_stack_api import (
OpenAIResponsePrompt, OpenAIResponsePrompt,
OpenAIResponseText, OpenAIResponseText,
Order, Order,
Prompts,
ResponseGuardrail, ResponseGuardrail,
Safety, Safety,
ToolGroups, ToolGroups,
@ -41,10 +43,12 @@ class MetaReferenceAgentsImpl(Agents):
config: MetaReferenceAgentsImplConfig, config: MetaReferenceAgentsImplConfig,
inference_api: Inference, inference_api: Inference,
vector_io_api: VectorIO, vector_io_api: VectorIO,
safety_api: Safety, safety_api: Safety | None,
tool_runtime_api: ToolRuntime, tool_runtime_api: ToolRuntime,
tool_groups_api: ToolGroups, tool_groups_api: ToolGroups,
conversations_api: Conversations, conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
policy: list[AccessRule], policy: list[AccessRule],
telemetry_enabled: bool = False, telemetry_enabled: bool = False,
): ):
@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
self.tool_groups_api = tool_groups_api self.tool_groups_api = tool_groups_api
self.conversations_api = conversations_api self.conversations_api = conversations_api
self.telemetry_enabled = telemetry_enabled self.telemetry_enabled = telemetry_enabled
self.prompts_api = prompts_api
self.files_api = files_api
self.in_memory_store = InmemoryKVStoreImpl() self.in_memory_store = InmemoryKVStoreImpl()
self.openai_responses_impl: OpenAIResponsesImpl | None = None self.openai_responses_impl: OpenAIResponsesImpl | None = None
self.policy = policy self.policy = policy
@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
vector_io_api=self.vector_io_api, vector_io_api=self.vector_io_api,
safety_api=self.safety_api, safety_api=self.safety_api,
conversations_api=self.conversations_api, conversations_api=self.conversations_api,
prompts_api=self.prompts_api,
files_api=self.files_api,
) )
async def shutdown(self) -> None: async def shutdown(self) -> None:

View file

@ -4,6 +4,7 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import re
import time import time
import uuid import uuid
from collections.abc import AsyncIterator from collections.abc import AsyncIterator
@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
from llama_stack_api import ( from llama_stack_api import (
ConversationItem, ConversationItem,
Conversations, Conversations,
Files,
Inference, Inference,
InvalidConversationIdError, InvalidConversationIdError,
ListOpenAIResponseInputItem, ListOpenAIResponseInputItem,
ListOpenAIResponseObject, ListOpenAIResponseObject,
OpenAIChatCompletionContentPartParam,
OpenAIDeleteResponseObject, OpenAIDeleteResponseObject,
OpenAIMessageParam, OpenAIMessageParam,
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseMessage, OpenAIResponseMessage,
@ -34,7 +39,9 @@ from llama_stack_api import (
OpenAIResponseText, OpenAIResponseText,
OpenAIResponseTextFormat, OpenAIResponseTextFormat,
OpenAISystemMessageParam, OpenAISystemMessageParam,
OpenAIUserMessageParam,
Order, Order,
Prompts,
ResponseGuardrailSpec, ResponseGuardrailSpec,
Safety, Safety,
ToolGroups, ToolGroups,
@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
from .tool_executor import ToolExecutor from .tool_executor import ToolExecutor
from .types import ChatCompletionContext, ToolContext from .types import ChatCompletionContext, ToolContext
from .utils import ( from .utils import (
convert_response_content_to_chat_content,
convert_response_input_to_chat_messages, convert_response_input_to_chat_messages,
convert_response_text_to_chat_response_format, convert_response_text_to_chat_response_format,
extract_guardrail_ids, extract_guardrail_ids,
@ -67,8 +75,10 @@ class OpenAIResponsesImpl:
tool_runtime_api: ToolRuntime, tool_runtime_api: ToolRuntime,
responses_store: ResponsesStore, responses_store: ResponsesStore,
vector_io_api: VectorIO, # VectorIO vector_io_api: VectorIO, # VectorIO
safety_api: Safety, safety_api: Safety | None,
conversations_api: Conversations, conversations_api: Conversations,
prompts_api: Prompts,
files_api: Files,
): ):
self.inference_api = inference_api self.inference_api = inference_api
self.tool_groups_api = tool_groups_api self.tool_groups_api = tool_groups_api
@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
tool_runtime_api=tool_runtime_api, tool_runtime_api=tool_runtime_api,
vector_io_api=vector_io_api, vector_io_api=vector_io_api,
) )
self.prompts_api = prompts_api
self.files_api = files_api
async def _prepend_previous_response( async def _prepend_previous_response(
self, self,
@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
# Use stored messages directly and convert only new input # Use stored messages directly and convert only new input
message_adapter = TypeAdapter(list[OpenAIMessageParam]) message_adapter = TypeAdapter(list[OpenAIMessageParam])
messages = message_adapter.validate_python(previous_response.messages) messages = message_adapter.validate_python(previous_response.messages)
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages) new_messages = await convert_response_input_to_chat_messages(
input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages) messages.extend(new_messages)
else: else:
# Backward compatibility: reconstruct from inputs # Backward compatibility: reconstruct from inputs
messages = await convert_response_input_to_chat_messages(all_input) messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
tool_context.recover_tools_from_previous_response(previous_response) tool_context.recover_tools_from_previous_response(previous_response)
elif conversation is not None: elif conversation is not None:
@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
all_input = input all_input = input
if not conversation_items.data: if not conversation_items.data:
# First turn - just convert the new input # First turn - just convert the new input
messages = await convert_response_input_to_chat_messages(input) messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
else: else:
if not stored_messages: if not stored_messages:
all_input = conversation_items.data all_input = conversation_items.data
@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
all_input = input all_input = input
messages = stored_messages or [] messages = stored_messages or []
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages) new_messages = await convert_response_input_to_chat_messages(
all_input, previous_messages=messages, files_api=self.files_api
)
messages.extend(new_messages) messages.extend(new_messages)
else: else:
all_input = input all_input = input
messages = await convert_response_input_to_chat_messages(all_input) messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
return all_input, messages, tool_context return all_input, messages, tool_context
async def _prepend_prompt(
self,
messages: list[OpenAIMessageParam],
openai_response_prompt: OpenAIResponsePrompt | None,
) -> None:
"""Prepend prompt template to messages, resolving text/image/file variables.
:param messages: List of OpenAIMessageParam objects
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
:returns: string of utf-8 characters
"""
if not openai_response_prompt or not openai_response_prompt.id:
return
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
if not cur_prompt or not cur_prompt.prompt:
return
cur_prompt_text = cur_prompt.prompt
cur_prompt_variables = cur_prompt.variables
if not openai_response_prompt.variables:
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
return
# Validate that all provided variables exist in the prompt
for name in openai_response_prompt.variables.keys():
if name not in cur_prompt_variables:
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
# Separate text and media variables
text_substitutions = {}
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
for name, value in openai_response_prompt.variables.items():
# Text variable found
if isinstance(value, OpenAIResponseInputMessageContentText):
text_substitutions[name] = value.text
# Media variable found
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
if isinstance(converted_parts, list):
media_content_parts.extend(converted_parts)
# Eg: {{product_photo}} becomes "[Image: product_photo]"
# This gives the model textual context about what media exists in the prompt
var_type = value.type.replace("input_", "").replace("_", " ").title()
text_substitutions[name] = f"[{var_type}: {name}]"
def replace_variable(match: re.Match[str]) -> str:
var_name = match.group(1).strip()
return str(text_substitutions.get(var_name, match.group(0)))
pattern = r"\{\{\s*(\w+)\s*\}\}"
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
# Insert system message with resolved text
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
# If we have media, create a new user message because allows to ingest images and files
if media_content_parts:
messages.append(OpenAIUserMessageParam(content=media_content_parts))
async def get_openai_response( async def get_openai_response(
self, self,
response_id: str, response_id: str,
@ -273,6 +355,14 @@ class OpenAIResponsesImpl:
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else [] guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
# Validate that Safety API is available if guardrails are requested
if guardrail_ids and self.safety_api is None:
raise ValueError(
"Cannot process guardrails: Safety API is not configured.\n\n"
"To use guardrails, ensure the Safety API is configured in your stack, or remove "
"the 'guardrails' parameter from your request."
)
if conversation is not None: if conversation is not None:
if previous_response_id is not None: if previous_response_id is not None:
raise ValueError( raise ValueError(
@ -289,6 +379,7 @@ class OpenAIResponsesImpl:
input=input, input=input,
conversation=conversation, conversation=conversation,
model=model, model=model,
prompt=prompt,
instructions=instructions, instructions=instructions,
previous_response_id=previous_response_id, previous_response_id=previous_response_id,
store=store, store=store,
@ -342,6 +433,7 @@ class OpenAIResponsesImpl:
instructions: str | None = None, instructions: str | None = None,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
prompt: OpenAIResponsePrompt | None = None,
store: bool | None = True, store: bool | None = True,
temperature: float | None = None, temperature: float | None = None,
text: OpenAIResponseText | None = None, text: OpenAIResponseText | None = None,
@ -364,6 +456,9 @@ class OpenAIResponsesImpl:
if instructions: if instructions:
messages.insert(0, OpenAISystemMessageParam(content=instructions)) messages.insert(0, OpenAISystemMessageParam(content=instructions))
# Prepend reusable prompt (if provided)
await self._prepend_prompt(messages, prompt)
# Structured outputs # Structured outputs
response_format = await convert_response_text_to_chat_response_format(text) response_format = await convert_response_text_to_chat_response_format(text)
@ -386,6 +481,7 @@ class OpenAIResponsesImpl:
ctx=ctx, ctx=ctx,
response_id=response_id, response_id=response_id,
created_at=created_at, created_at=created_at,
prompt=prompt,
text=text, text=text,
max_infer_iters=max_infer_iters, max_infer_iters=max_infer_iters,
parallel_tool_calls=parallel_tool_calls, parallel_tool_calls=parallel_tool_calls,

View file

@ -66,6 +66,8 @@ from llama_stack_api import (
OpenAIResponseUsage, OpenAIResponseUsage,
OpenAIResponseUsageInputTokensDetails, OpenAIResponseUsageInputTokensDetails,
OpenAIResponseUsageOutputTokensDetails, OpenAIResponseUsageOutputTokensDetails,
OpenAIToolMessageParam,
Safety,
WebSearchToolTypes, WebSearchToolTypes,
) )
@ -111,7 +113,7 @@ class StreamingResponseOrchestrator:
max_infer_iters: int, max_infer_iters: int,
tool_executor, # Will be the tool execution logic from the main class tool_executor, # Will be the tool execution logic from the main class
instructions: str | None, instructions: str | None,
safety_api, safety_api: Safety | None,
guardrail_ids: list[str] | None = None, guardrail_ids: list[str] | None = None,
prompt: OpenAIResponsePrompt | None = None, prompt: OpenAIResponsePrompt | None = None,
parallel_tool_calls: bool | None = None, parallel_tool_calls: bool | None = None,
@ -905,10 +907,16 @@ class StreamingResponseOrchestrator:
"""Coordinate execution of both function and non-function tool calls.""" """Coordinate execution of both function and non-function tool calls."""
# Execute non-function tool calls # Execute non-function tool calls
for tool_call in non_function_tool_calls: for tool_call in non_function_tool_calls:
# Check if total calls made to built-in and mcp tools exceed max_tool_calls # if total calls made to built-in and mcp tools exceed max_tool_calls
# then create a tool response message indicating the call was skipped
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls: if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.") logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
break skipped_call_message = OpenAIToolMessageParam(
content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
tool_call_id=tool_call.id,
)
next_turn_messages.append(skipped_call_message)
continue
# Find the item_id for this tool call # Find the item_id for this tool call
matching_item_id = None matching_item_id = None

View file

@ -5,11 +5,14 @@
# the root directory of this source tree. # the root directory of this source tree.
import asyncio import asyncio
import base64
import mimetypes
import re import re
import uuid import uuid
from collections.abc import Sequence from collections.abc import Sequence
from llama_stack_api import ( from llama_stack_api import (
Files,
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartParam, OpenAIChatCompletionContentPartParam,
@ -18,6 +21,8 @@ from llama_stack_api import (
OpenAIChatCompletionToolCallFunction, OpenAIChatCompletionToolCallFunction,
OpenAIChoice, OpenAIChoice,
OpenAIDeveloperMessageParam, OpenAIDeveloperMessageParam,
OpenAIFile,
OpenAIFileFile,
OpenAIImageURL, OpenAIImageURL,
OpenAIJSONSchema, OpenAIJSONSchema,
OpenAIMessageParam, OpenAIMessageParam,
@ -29,6 +34,7 @@ from llama_stack_api import (
OpenAIResponseInput, OpenAIResponseInput,
OpenAIResponseInputFunctionToolCallOutput, OpenAIResponseInputFunctionToolCallOutput,
OpenAIResponseInputMessageContent, OpenAIResponseInputMessageContent,
OpenAIResponseInputMessageContentFile,
OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText, OpenAIResponseInputMessageContentText,
OpenAIResponseInputTool, OpenAIResponseInputTool,
@ -37,9 +43,11 @@ from llama_stack_api import (
OpenAIResponseMessage, OpenAIResponseMessage,
OpenAIResponseOutputMessageContent, OpenAIResponseOutputMessageContent,
OpenAIResponseOutputMessageContentOutputText, OpenAIResponseOutputMessageContentOutputText,
OpenAIResponseOutputMessageFileSearchToolCall,
OpenAIResponseOutputMessageFunctionToolCall, OpenAIResponseOutputMessageFunctionToolCall,
OpenAIResponseOutputMessageMCPCall, OpenAIResponseOutputMessageMCPCall,
OpenAIResponseOutputMessageMCPListTools, OpenAIResponseOutputMessageMCPListTools,
OpenAIResponseOutputMessageWebSearchToolCall,
OpenAIResponseText, OpenAIResponseText,
OpenAISystemMessageParam, OpenAISystemMessageParam,
OpenAIToolMessageParam, OpenAIToolMessageParam,
@ -49,6 +57,46 @@ from llama_stack_api import (
) )
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
"""
Extract raw bytes from file using the Files API.
:param file_id: The file identifier (e.g., "file-abc123")
:param files_api: Files API instance
:returns: Raw file content as bytes
:raises: ValueError if file cannot be retrieved
"""
try:
response = await files_api.openai_retrieve_file_content(file_id)
return bytes(response.body)
except Exception as e:
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
"""
Converts raw binary bytes into a safe ASCII text representation for URLs
:param raw_bytes: the actual bytes that represents file content
:returns: string of utf-8 characters
"""
return base64.b64encode(raw_bytes).decode("utf-8")
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
"""
Construct data url with decoded data inside
:param ascii_text: ASCII content
:param mime_type: MIME type of file
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
"""
if not mime_type:
mime_type = "application/octet-stream"
return f"data:{mime_type};base64,{ascii_text}"
async def convert_chat_choice_to_response_message( async def convert_chat_choice_to_response_message(
choice: OpenAIChoice, choice: OpenAIChoice,
citation_files: dict[str, str] | None = None, citation_files: dict[str, str] | None = None,
@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
async def convert_response_content_to_chat_content( async def convert_response_content_to_chat_content(
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent], content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
files_api: Files | None,
) -> str | list[OpenAIChatCompletionContentPartParam]: ) -> str | list[OpenAIChatCompletionContentPartParam]:
""" """
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts. Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
The content schemas of each API look similar, but are not exactly the same. The content schemas of each API look similar, but are not exactly the same.
:param content: The content to convert
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
""" """
if isinstance(content, str): if isinstance(content, str):
return content return content
@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText): elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text)) converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
elif isinstance(content_part, OpenAIResponseInputMessageContentImage): elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
detail = content_part.detail
image_mime_type = None
if content_part.image_url: if content_part.image_url:
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail) image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url)) converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
elif content_part.file_id:
if files_api is None:
raise ValueError("file_ids are not supported by this implementation of the Stack")
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
if image_file_response.filename:
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
image_data_url = construct_data_url(ascii_text, image_mime_type)
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
else:
raise ValueError(
f"Image content must have either 'image_url' or 'file_id'. "
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
)
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
resolved_file_data = None
file_data = content_part.file_data
file_id = content_part.file_id
file_url = content_part.file_url
filename = content_part.filename
file_mime_type = None
if not any([file_data, file_id, file_url]):
raise ValueError(
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
)
if file_id:
if files_api is None:
raise ValueError("file_ids are not supported by this implementation of the Stack")
file_response = await files_api.openai_retrieve_file(file_id)
if not filename:
filename = file_response.filename
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
elif file_data:
if file_data.startswith("data:"):
resolved_file_data = file_data
else:
# Raw base64 data, wrap in data URL format
if filename:
file_mime_type, _ = mimetypes.guess_type(filename)
resolved_file_data = construct_data_url(file_data, file_mime_type)
elif file_url:
resolved_file_data = file_url
converted_parts.append(
OpenAIFile(
file=OpenAIFileFile(
file_data=resolved_file_data,
filename=filename,
)
)
)
elif isinstance(content_part, str): elif isinstance(content_part, str):
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part)) converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
else: else:
@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
async def convert_response_input_to_chat_messages( async def convert_response_input_to_chat_messages(
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
previous_messages: list[OpenAIMessageParam] | None = None, previous_messages: list[OpenAIMessageParam] | None = None,
files_api: Files | None = None,
) -> list[OpenAIMessageParam]: ) -> list[OpenAIMessageParam]:
""" """
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages. Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
:param input: The input to convert :param input: The input to convert
:param previous_messages: Optional previous messages to check for function_call references :param previous_messages: Optional previous messages to check for function_call references
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
""" """
messages: list[OpenAIMessageParam] = [] messages: list[OpenAIMessageParam] = []
if isinstance(input, list): if isinstance(input, list):
@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools): elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
# the tool list will be handled separately # the tool list will be handled separately
pass pass
elif isinstance(
input_item,
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
):
# these tool calls are tracked internally but not converted to chat messages
pass
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance( elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
input_item, OpenAIResponseMCPApprovalResponse input_item, OpenAIResponseMCPApprovalResponse
): ):
@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
pass pass
elif isinstance(input_item, OpenAIResponseMessage): elif isinstance(input_item, OpenAIResponseMessage):
# Narrow type to OpenAIResponseMessage which has content and role attributes # Narrow type to OpenAIResponseMessage which has content and role attributes
content = await convert_response_content_to_chat_content(input_item.content) content = await convert_response_content_to_chat_content(input_item.content, files_api)
message_type = await get_message_type_by_role(input_item.role) message_type = await get_message_type_by_role(input_item.role)
if message_type is None: if message_type is None:
raise ValueError( raise ValueError(
@ -320,11 +439,15 @@ def is_function_tool_call(
return False return False
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None: async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
"""Run guardrails against messages and return violation message if blocked.""" """Run guardrails against messages and return violation message if blocked."""
if not messages: if not messages:
return None return None
# If safety API is not available, skip guardrails
if safety_api is None:
return None
# Look up shields to get their provider_resource_id (actual model ID) # Look up shields to get their provider_resource_id (actual model ID)
model_ids = [] model_ids = []
# TODO: list_shields not in Safety interface but available at runtime via API routing # TODO: list_shields not in Safety interface but available at runtime via API routing

View file

@ -30,11 +30,15 @@ def available_providers() -> list[ProviderSpec]:
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig", config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
api_dependencies=[ api_dependencies=[
Api.inference, Api.inference,
Api.safety,
Api.vector_io, Api.vector_io,
Api.tool_runtime, Api.tool_runtime,
Api.tool_groups, Api.tool_groups,
Api.conversations, Api.conversations,
Api.prompts,
Api.files,
],
optional_api_dependencies=[
Api.safety,
], ],
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.", description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
), ),

View file

@ -4,8 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from urllib.parse import urljoin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from .config import AzureConfig from .config import AzureConfig
@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
Returns the Azure API base URL from the configuration. Returns the Azure API base URL from the configuration.
""" """
return urljoin(str(self.config.api_base), "/openai/v1") return str(self.config.base_url)

View file

@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class AzureConfig(RemoteInferenceProviderConfig): class AzureConfig(RemoteInferenceProviderConfig):
api_base: HttpUrl = Field( base_url: HttpUrl | None = Field(
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)", default=None,
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
) )
api_version: str | None = Field( api_version: str | None = Field(
default_factory=lambda: os.getenv("AZURE_API_VERSION"), default_factory=lambda: os.getenv("AZURE_API_VERSION"),
@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
def sample_run_config( def sample_run_config(
cls, cls,
api_key: str = "${env.AZURE_API_KEY:=}", api_key: str = "${env.AZURE_API_KEY:=}",
api_base: str = "${env.AZURE_API_BASE:=}", base_url: str = "${env.AZURE_API_BASE:=}",
api_version: str = "${env.AZURE_API_VERSION:=}", api_version: str = "${env.AZURE_API_VERSION:=}",
api_type: str = "${env.AZURE_API_TYPE:=}", api_type: str = "${env.AZURE_API_TYPE:=}",
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"api_key": api_key, "api_key": api_key,
"api_base": api_base, "base_url": base_url,
"api_version": api_version, "api_version": api_version,
"api_type": api_type, "api_type": api_type,
} }

View file

@ -4,8 +4,6 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from urllib.parse import urljoin
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from llama_stack_api import ( from llama_stack_api import (
OpenAIEmbeddingsRequestWithExtraBody, OpenAIEmbeddingsRequestWithExtraBody,
@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "cerebras_api_key" provider_data_api_key_field: str = "cerebras_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return urljoin(self.config.base_url, "v1") return str(self.config.base_url)
async def openai_embeddings( async def openai_embeddings(
self, self,

View file

@ -7,12 +7,12 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
DEFAULT_BASE_URL = "https://api.cerebras.ai" DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
class CerebrasProviderDataValidator(BaseModel): class CerebrasProviderDataValidator(BaseModel):
@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class CerebrasImplConfig(RemoteInferenceProviderConfig): class CerebrasImplConfig(RemoteInferenceProviderConfig):
base_url: str = Field( base_url: HttpUrl | None = Field(
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL), default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
description="Base URL for the Cerebras API", description="Base URL for the Cerebras API",
) )

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class DatabricksImplConfig(RemoteInferenceProviderConfig): class DatabricksImplConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the Databricks model serving endpoint", description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
) )
auth_credential: SecretStr | None = Field( auth_credential: SecretStr | None = Field(
default=None, default=None,
@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.DATABRICKS_HOST:=}", base_url: str = "${env.DATABRICKS_HOST:=}",
api_token: str = "${env.DATABRICKS_TOKEN:=}", api_token: str = "${env.DATABRICKS_TOKEN:=}",
**kwargs: Any, **kwargs: Any,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_token": api_token, "api_token": api_token,
} }

View file

@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
} }
def get_base_url(self) -> str: def get_base_url(self) -> str:
return f"{self.config.url}/serving-endpoints" return str(self.config.base_url)
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
# Filter out None values from endpoint names # Filter out None values from endpoint names
api_token = self._get_api_key_from_config_or_provider_data() api_token = self._get_api_key_from_config_or_provider_data()
# WorkspaceClient expects base host without /serving-endpoints suffix
base_url_str = str(self.config.base_url)
if base_url_str.endswith("/serving-endpoints"):
host = base_url_str[:-18] # Remove '/serving-endpoints'
else:
host = base_url_str
return [ return [
endpoint.name # type: ignore[misc] endpoint.name # type: ignore[misc]
for endpoint in WorkspaceClient( for endpoint in WorkspaceClient(
host=self.config.url, token=api_token host=host, token=api_token
).serving_endpoints.list() # TODO: this is not async ).serving_endpoints.list() # TODO: this is not async
] ]

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class FireworksImplConfig(RemoteInferenceProviderConfig): class FireworksImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.fireworks.ai/inference/v1", default=HttpUrl("https://api.fireworks.ai/inference/v1"),
description="The URL for the Fireworks server", description="The URL for the Fireworks server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.fireworks.ai/inference/v1", "base_url": "https://api.fireworks.ai/inference/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "fireworks_api_key" provider_data_api_key_field: str = "fireworks_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return "https://api.fireworks.ai/inference/v1" return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class GroqConfig(RemoteInferenceProviderConfig): class GroqConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.groq.com", default=HttpUrl("https://api.groq.com/openai/v1"),
description="The URL for the Groq AI server", description="The URL for the Groq AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.groq.com", "base_url": "https://api.groq.com/openai/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
provider_data_api_key_field: str = "groq_api_key" provider_data_api_key_field: str = "groq_api_key"
def get_base_url(self) -> str: def get_base_url(self) -> str:
return f"{self.config.url}/openai/v1" return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class LlamaCompatConfig(RemoteInferenceProviderConfig): class LlamaCompatConfig(RemoteInferenceProviderConfig):
openai_compat_api_base: str = Field( base_url: HttpUrl | None = Field(
default="https://api.llama.com/compat/v1/", default=HttpUrl("https://api.llama.com/compat/v1/"),
description="The URL for the Llama API server", description="The URL for the Llama API server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
return { return {
"openai_compat_api_base": "https://api.llama.com/compat/v1/", "base_url": "https://api.llama.com/compat/v1/",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
:return: The Llama API base URL :return: The Llama API base URL
""" """
return self.config.openai_compat_api_base return str(self.config.base_url)
async def openai_completion( async def openai_completion(
self, self,

View file

@ -7,7 +7,7 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
URL of your running NVIDIA NIM and do not need to set the api_key. URL of your running NVIDIA NIM and do not need to set the api_key.
""" """
url: str = Field( base_url: HttpUrl | None = Field(
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"), default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
description="A base url for accessing the NVIDIA NIM", description="A base url for accessing the NVIDIA NIM",
) )
timeout: int = Field( timeout: int = Field(
default=60, default=60,
description="Timeout for the HTTP requests", description="Timeout for the HTTP requests",
) )
append_api_version: bool = Field(
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
)
rerank_model_to_url: dict[str, str] = Field( rerank_model_to_url: dict[str, str] = Field(
default_factory=lambda: { default_factory=lambda: {
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking", "nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}", base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
api_key: str = "${env.NVIDIA_API_KEY:=}", api_key: str = "${env.NVIDIA_API_KEY:=}",
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
**kwargs, **kwargs,
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_key": api_key, "api_key": api_key,
"append_api_version": append_api_version,
} }

View file

@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
} }
async def initialize(self) -> None: async def initialize(self) -> None:
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...") logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
if _is_nvidia_hosted(self.config): if _is_nvidia_hosted(self.config):
if not self.config.auth_credential: if not self.config.auth_credential:
@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
:return: The NVIDIA API base URL :return: The NVIDIA API base URL
""" """
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url return str(self.config.base_url)
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
""" """

View file

@ -8,4 +8,4 @@ from . import NVIDIAConfig
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool: def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
return "integrate.api.nvidia.com" in config.url return "integrate.api.nvidia.com" in str(config.base_url)

View file

@ -6,20 +6,22 @@
from typing import Any from typing import Any
from pydantic import Field, SecretStr from pydantic import Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
DEFAULT_OLLAMA_URL = "http://localhost:11434" DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
class OllamaImplConfig(RemoteInferenceProviderConfig): class OllamaImplConfig(RemoteInferenceProviderConfig):
auth_credential: SecretStr | None = Field(default=None, exclude=True) auth_credential: SecretStr | None = Field(default=None, exclude=True)
url: str = DEFAULT_OLLAMA_URL base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
@classmethod @classmethod
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]: def sample_run_config(
cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
} }

View file

@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
# ollama client attaches itself to the current event loop (sadly?) # ollama client attaches itself to the current event loop (sadly?)
loop = asyncio.get_running_loop() loop = asyncio.get_running_loop()
if loop not in self._clients: if loop not in self._clients:
self._clients[loop] = AsyncOllamaClient(host=self.config.url) # Ollama client expects base URL without /v1 suffix
base_url_str = str(self.config.base_url)
if base_url_str.endswith("/v1"):
host = base_url_str[:-3]
else:
host = base_url_str
self._clients[loop] = AsyncOllamaClient(host=host)
return self._clients[loop] return self._clients[loop]
def get_api_key(self): def get_api_key(self):
return "NO KEY REQUIRED" return "NO KEY REQUIRED"
def get_base_url(self): def get_base_url(self):
return self.config.url.rstrip("/") + "/v1" return str(self.config.base_url)
async def initialize(self) -> None: async def initialize(self) -> None:
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...") logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
r = await self.health() r = await self.health()
if r["status"] == HealthStatus.ERROR: if r["status"] == HealthStatus.ERROR:
logger.warning( logger.warning(

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class OpenAIConfig(RemoteInferenceProviderConfig): class OpenAIConfig(RemoteInferenceProviderConfig):
base_url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.openai.com/v1", default=HttpUrl("https://api.openai.com/v1"),
description="Base URL for OpenAI API", description="Base URL for OpenAI API",
) )

View file

@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
Returns the OpenAI API base URL from the configuration. Returns the OpenAI API base URL from the configuration.
""" """
return self.config.base_url return str(self.config.base_url)

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class PassthroughImplConfig(RemoteInferenceProviderConfig): class PassthroughImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the passthrough endpoint", description="The URL for the passthrough endpoint",
) )
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
) -> dict[str, Any]: ) -> dict[str, Any]:
return { return {
"url": url, "base_url": base_url,
"api_key": api_key, "api_key": api_key,
} }

View file

@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
def _get_passthrough_url(self) -> str: def _get_passthrough_url(self) -> str:
"""Get the passthrough URL from config or provider data.""" """Get the passthrough URL from config or provider data."""
if self.config.url is not None: if self.config.base_url is not None:
return self.config.url return str(self.config.base_url)
provider_data = self.get_request_provider_data() provider_data = self.get_request_provider_data()
if provider_data is None: if provider_data is None:

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class RunpodImplConfig(RemoteInferenceProviderConfig): class RunpodImplConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the Runpod model serving endpoint", description="The URL for the Runpod model serving endpoint",
) )
@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]: def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
return { return {
"url": "${env.RUNPOD_URL:=}", "base_url": "${env.RUNPOD_URL:=}",
"api_token": "${env.RUNPOD_API_TOKEN}", "api_token": "${env.RUNPOD_API_TOKEN}",
} }

View file

@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
def get_base_url(self) -> str: def get_base_url(self) -> str:
"""Get base URL for OpenAI client.""" """Get base URL for OpenAI client."""
return self.config.url return str(self.config.base_url)
async def openai_chat_completion( async def openai_chat_completion(
self, self,

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class SambaNovaImplConfig(RemoteInferenceProviderConfig): class SambaNovaImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.sambanova.ai/v1", default=HttpUrl("https://api.sambanova.ai/v1"),
description="The URL for the SambaNova AI server", description="The URL for the SambaNova AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]: def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.sambanova.ai/v1", "base_url": "https://api.sambanova.ai/v1",
"api_key": api_key, "api_key": api_key,
} }

View file

@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
:return: The SambaNova base URL :return: The SambaNova base URL
""" """
return self.config.url return str(self.config.base_url)

View file

@ -5,7 +5,7 @@
# the root directory of this source tree. # the root directory of this source tree.
from pydantic import BaseModel, Field, SecretStr from pydantic import BaseModel, Field, HttpUrl, SecretStr
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
class TGIImplConfig(RemoteInferenceProviderConfig): class TGIImplConfig(RemoteInferenceProviderConfig):
auth_credential: SecretStr | None = Field(default=None, exclude=True) auth_credential: SecretStr | None = Field(default=None, exclude=True)
url: str = Field( base_url: HttpUrl | None = Field(
description="The URL for the TGI serving endpoint", default=None,
description="The URL for the TGI serving endpoint (should include /v1 path)",
) )
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.TGI_URL:=}", base_url: str = "${env.TGI_URL:=}",
**kwargs, **kwargs,
): ):
return { return {
"url": url, "base_url": base_url,
} }

View file

@ -8,7 +8,7 @@
from collections.abc import Iterable from collections.abc import Iterable
from huggingface_hub import AsyncInferenceClient, HfApi from huggingface_hub import AsyncInferenceClient, HfApi
from pydantic import SecretStr from pydantic import HttpUrl, SecretStr
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
class _HfAdapter(OpenAIMixin): class _HfAdapter(OpenAIMixin):
url: str base_url: HttpUrl
api_key: SecretStr api_key: SecretStr
hf_client: AsyncInferenceClient hf_client: AsyncInferenceClient
@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
return "NO KEY REQUIRED" return "NO KEY REQUIRED"
def get_base_url(self): def get_base_url(self):
return self.url return self.base_url
async def list_provider_model_ids(self) -> Iterable[str]: async def list_provider_model_ids(self) -> Iterable[str]:
return [self.model_id] return [self.model_id]
@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
class TGIAdapter(_HfAdapter): class TGIAdapter(_HfAdapter):
async def initialize(self, config: TGIImplConfig) -> None: async def initialize(self, config: TGIImplConfig) -> None:
if not config.url: if not config.base_url:
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.") raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
log.info(f"Initializing TGI client with url={config.url}") log.info(f"Initializing TGI client with url={config.base_url}")
self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference") # Extract base URL without /v1 for HF client initialization
base_url_str = str(config.base_url).rstrip("/")
if base_url_str.endswith("/v1"):
base_url_for_client = base_url_str[:-3]
else:
base_url_for_client = base_url_str
self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
endpoint_info = await self.hf_client.get_endpoint_info() endpoint_info = await self.hf_client.get_endpoint_info()
self.max_tokens = endpoint_info["max_total_tokens"] self.max_tokens = endpoint_info["max_total_tokens"]
self.model_id = endpoint_info["model_id"] self.model_id = endpoint_info["model_id"]
self.url = f"{config.url.rstrip('/')}/v1" self.base_url = config.base_url
self.api_key = SecretStr("NO_KEY") self.api_key = SecretStr("NO_KEY")

View file

@ -6,7 +6,7 @@
from typing import Any from typing import Any
from pydantic import Field from pydantic import Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class TogetherImplConfig(RemoteInferenceProviderConfig): class TogetherImplConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default="https://api.together.xyz/v1", default=HttpUrl("https://api.together.xyz/v1"),
description="The URL for the Together AI server", description="The URL for the Together AI server",
) )
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "https://api.together.xyz/v1", "base_url": "https://api.together.xyz/v1",
"api_key": "${env.TOGETHER_API_KEY:=}", "api_key": "${env.TOGETHER_API_KEY:=}",
} }

View file

@ -9,7 +9,6 @@ from collections.abc import Iterable
from typing import Any, cast from typing import Any, cast
from together import AsyncTogether # type: ignore[import-untyped] from together import AsyncTogether # type: ignore[import-untyped]
from together.constants import BASE_URL # type: ignore[import-untyped]
from llama_stack.core.request_headers import NeedsRequestProviderData from llama_stack.core.request_headers import NeedsRequestProviderData
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
provider_data_api_key_field: str = "together_api_key" provider_data_api_key_field: str = "together_api_key"
def get_base_url(self): def get_base_url(self):
return BASE_URL return str(self.config.base_url)
def _get_client(self) -> AsyncTogether: def _get_client(self) -> AsyncTogether:
together_api_key = None together_api_key = None

View file

@ -6,7 +6,7 @@
from pathlib import Path from pathlib import Path
from pydantic import Field, SecretStr, field_validator from pydantic import Field, HttpUrl, SecretStr, field_validator
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
@json_schema_type @json_schema_type
class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig): class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
url: str | None = Field( base_url: HttpUrl | None = Field(
default=None, default=None,
description="The URL for the vLLM model serving endpoint", description="The URL for the vLLM model serving endpoint",
) )
@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config( def sample_run_config(
cls, cls,
url: str = "${env.VLLM_URL:=}", base_url: str = "${env.VLLM_URL:=}",
**kwargs, **kwargs,
): ):
return { return {
"url": url, "base_url": base_url,
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}", "max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
"api_token": "${env.VLLM_API_TOKEN:=fake}", "api_token": "${env.VLLM_API_TOKEN:=fake}",
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}", "tls_verify": "${env.VLLM_TLS_VERIFY:=true}",

View file

@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
def get_base_url(self) -> str: def get_base_url(self) -> str:
"""Get the base URL from config.""" """Get the base URL from config."""
if not self.config.url: if not self.config.base_url:
raise ValueError("No base URL configured") raise ValueError("No base URL configured")
return self.config.url return str(self.config.base_url)
async def initialize(self) -> None: async def initialize(self) -> None:
if not self.config.url: if not self.config.base_url:
raise ValueError( raise ValueError(
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM." "You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
) )

View file

@ -7,7 +7,7 @@
import os import os
from typing import Any from typing import Any
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, HttpUrl
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
from llama_stack_api import json_schema_type from llama_stack_api import json_schema_type
@ -23,7 +23,7 @@ class WatsonXProviderDataValidator(BaseModel):
@json_schema_type @json_schema_type
class WatsonXConfig(RemoteInferenceProviderConfig): class WatsonXConfig(RemoteInferenceProviderConfig):
url: str = Field( base_url: HttpUrl | None = Field(
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"), default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
description="A base url for accessing the watsonx.ai", description="A base url for accessing the watsonx.ai",
) )
@ -39,7 +39,7 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
@classmethod @classmethod
def sample_run_config(cls, **kwargs) -> dict[str, Any]: def sample_run_config(cls, **kwargs) -> dict[str, Any]:
return { return {
"url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}", "base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
"api_key": "${env.WATSONX_API_KEY:=}", "api_key": "${env.WATSONX_API_KEY:=}",
"project_id": "${env.WATSONX_PROJECT_ID:=}", "project_id": "${env.WATSONX_PROJECT_ID:=}",
} }

View file

@ -255,7 +255,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
) )
def get_base_url(self) -> str: def get_base_url(self) -> str:
return self.config.url return str(self.config.base_url)
# Copied from OpenAIMixin # Copied from OpenAIMixin
async def check_model_availability(self, model: str) -> bool: async def check_model_availability(self, model: str) -> bool:
@ -316,7 +316,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
""" """
Retrieves foundation model specifications from the watsonx.ai API. Retrieves foundation model specifications from the watsonx.ai API.
""" """
url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25" url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25"
headers = { headers = {
# Note that there is no authorization header. Listing models does not require authentication. # Note that there is no authorization header. Listing models does not require authentication.
"Content-Type": "application/json", "Content-Type": "application/json",

View file

@ -3,23 +3,10 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Iterable
from typing import ( from typing import (
Any, Any,
) )
from openai.types.chat import (
ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
)
try:
from openai.types.chat import (
ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall,
)
except ImportError:
from openai.types.chat.chat_completion_message_tool_call import (
ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
)
from openai.types.chat import ( from openai.types.chat import (
ChatCompletionMessageToolCall, ChatCompletionMessageToolCall,
) )
@ -32,18 +19,6 @@ from llama_stack.models.llama.datatypes import (
ToolCall, ToolCall,
ToolDefinition, ToolDefinition,
) )
from llama_stack_api import (
URL,
GreedySamplingStrategy,
ImageContentItem,
JsonSchemaResponseFormat,
OpenAIResponseFormatParam,
SamplingParams,
TextContentItem,
TopKSamplingStrategy,
TopPSamplingStrategy,
_URLOrData,
)
logger = get_logger(name=__name__, category="providers::utils") logger = get_logger(name=__name__, category="providers::utils")
@ -73,42 +48,6 @@ class OpenAICompatCompletionResponse(BaseModel):
choices: list[OpenAICompatCompletionChoice] choices: list[OpenAICompatCompletionChoice]
def get_sampling_strategy_options(params: SamplingParams) -> dict:
options = {}
if isinstance(params.strategy, GreedySamplingStrategy):
options["temperature"] = 0.0
elif isinstance(params.strategy, TopPSamplingStrategy):
if params.strategy.temperature is not None:
options["temperature"] = params.strategy.temperature
if params.strategy.top_p is not None:
options["top_p"] = params.strategy.top_p
elif isinstance(params.strategy, TopKSamplingStrategy):
options["top_k"] = params.strategy.top_k
else:
raise ValueError(f"Unsupported sampling strategy: {params.strategy}")
return options
def get_sampling_options(params: SamplingParams | None) -> dict:
if not params:
return {}
options = {}
if params:
options.update(get_sampling_strategy_options(params))
if params.max_tokens:
options["max_tokens"] = params.max_tokens
if params.repetition_penalty is not None and params.repetition_penalty != 1.0:
options["repeat_penalty"] = params.repetition_penalty
if params.stop is not None:
options["stop"] = params.stop
return options
def text_from_choice(choice) -> str: def text_from_choice(choice) -> str:
if hasattr(choice, "delta") and choice.delta: if hasattr(choice, "delta") and choice.delta:
return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
@ -253,154 +192,6 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
return out return out
def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
"""
Convert a StopReason to an OpenAI chat completion finish_reason.
"""
return {
StopReason.end_of_turn: "stop",
StopReason.end_of_message: "tool_calls",
StopReason.out_of_tokens: "length",
}.get(stop_reason, "stop")
def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
"""
Convert an OpenAI chat completion finish_reason to a StopReason.
finish_reason: Literal["stop", "length", "tool_calls", ...]
- stop: model hit a natural stop point or a provided stop sequence
- length: maximum number of tokens specified in the request was reached
- tool_calls: model called a tool
->
class StopReason(Enum):
end_of_turn = "end_of_turn"
end_of_message = "end_of_message"
out_of_tokens = "out_of_tokens"
"""
# TODO(mf): are end_of_turn and end_of_message semantics correct?
return {
"stop": StopReason.end_of_turn,
"length": StopReason.out_of_tokens,
"tool_calls": StopReason.end_of_message,
}.get(finish_reason, StopReason.end_of_turn)
def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
lls_tools: list[ToolDefinition] = []
if not tools:
return lls_tools
for tool in tools:
tool_fn = tool.get("function", {})
tool_name = tool_fn.get("name", None)
tool_desc = tool_fn.get("description", None)
tool_params = tool_fn.get("parameters", None)
lls_tool = ToolDefinition(
tool_name=tool_name,
description=tool_desc,
input_schema=tool_params, # Pass through entire JSON Schema
)
lls_tools.append(lls_tool)
return lls_tools
def _convert_openai_request_response_format(
response_format: OpenAIResponseFormatParam | None = None,
):
if not response_format:
return None
# response_format can be a dict or a pydantic model
response_format_dict = dict(response_format) # type: ignore[arg-type] # OpenAIResponseFormatParam union needs dict conversion
if response_format_dict.get("type", "") == "json_schema":
return JsonSchemaResponseFormat(
type="json_schema", # type: ignore[arg-type] # Literal["json_schema"] incompatible with expected type
json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
)
return None
def _convert_openai_tool_calls(
tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall],
) -> list[ToolCall]:
"""
Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
OpenAI ChatCompletionMessageToolCall:
id: str
function: Function
type: Literal["function"]
OpenAI Function:
arguments: str
name: str
->
ToolCall:
call_id: str
tool_name: str
arguments: Dict[str, ...]
"""
if not tool_calls:
return [] # CompletionMessage tool_calls is not optional
return [
ToolCall(
call_id=call.id,
tool_name=call.function.name,
arguments=call.function.arguments,
)
for call in tool_calls
]
def _convert_openai_sampling_params(
max_tokens: int | None = None,
temperature: float | None = None,
top_p: float | None = None,
) -> SamplingParams:
sampling_params = SamplingParams()
if max_tokens:
sampling_params.max_tokens = max_tokens
# Map an explicit temperature of 0 to greedy sampling
if temperature == 0:
sampling_params.strategy = GreedySamplingStrategy()
else:
# OpenAI defaults to 1.0 for temperature and top_p if unset
if temperature is None:
temperature = 1.0
if top_p is None:
top_p = 1.0
sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) # type: ignore[assignment] # SamplingParams.strategy union accepts this type
return sampling_params
def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
if content is None:
return ""
if isinstance(content, str):
return content
elif isinstance(content, list):
return [openai_content_to_content(c) for c in content]
elif hasattr(content, "type"):
if content.type == "text":
return TextContentItem(type="text", text=content.text) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
elif content.type == "image_url":
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
else:
raise ValueError(f"Unknown content type: {content.type}")
else:
raise ValueError(f"Unknown content type: {content}")
async def prepare_openai_completion_params(**params): async def prepare_openai_completion_params(**params):
async def _prepare_value(value: Any) -> Any: async def _prepare_value(value: Any) -> Any:
new_value = value new_value = value

View file

@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
return api_key return api_key
def _validate_model_allowed(self, provider_model_id: str) -> None:
"""
Validate that the model is in the allowed_models list if configured.
:param provider_model_id: The provider-specific model ID to validate
:raises ValueError: If the model is not in the allowed_models list
"""
if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
raise ValueError(
f"Model '{provider_model_id}' is not in the allowed models list. "
f"Allowed models: {self.config.allowed_models}"
)
async def _get_provider_model_id(self, model: str) -> str: async def _get_provider_model_id(self, model: str) -> str:
""" """
Get the provider-specific model ID from the model store. Get the provider-specific model ID from the model store.
@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
Direct OpenAI completion API call. Direct OpenAI completion API call.
""" """
# TODO: fix openai_completion to return type compatible with OpenAI's API response # TODO: fix openai_completion to return type compatible with OpenAI's API response
provider_model_id = await self._get_provider_model_id(params.model)
self._validate_model_allowed(provider_model_id)
completion_kwargs = await prepare_openai_completion_params( completion_kwargs = await prepare_openai_completion_params(
model=await self._get_provider_model_id(params.model), model=provider_model_id,
prompt=params.prompt, prompt=params.prompt,
best_of=params.best_of, best_of=params.best_of,
echo=params.echo, echo=params.echo,
@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
""" """
Direct OpenAI chat completion API call. Direct OpenAI chat completion API call.
""" """
provider_model_id = await self._get_provider_model_id(params.model)
self._validate_model_allowed(provider_model_id)
messages = params.messages messages = params.messages
if self.download_images: if self.download_images:
@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
messages = [await _localize_image_url(m) for m in messages] messages = [await _localize_image_url(m) for m in messages]
request_params = await prepare_openai_completion_params( request_params = await prepare_openai_completion_params(
model=await self._get_provider_model_id(params.model), model=provider_model_id,
messages=messages, messages=messages,
frequency_penalty=params.frequency_penalty, frequency_penalty=params.frequency_penalty,
function_call=params.function_call, function_call=params.function_call,
@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
""" """
Direct OpenAI embeddings API call. Direct OpenAI embeddings API call.
""" """
provider_model_id = await self._get_provider_model_id(params.model)
self._validate_model_allowed(provider_model_id)
# Build request params conditionally to avoid NotGiven/Omit type mismatch # Build request params conditionally to avoid NotGiven/Omit type mismatch
# The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven # The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
request_params: dict[str, Any] = { request_params: dict[str, Any] = {
"model": await self._get_provider_model_id(params.model), "model": provider_model_id,
"input": params.input, "input": params.input,
} }
if params.encoding_format is not None: if params.encoding_format is not None:

View file

@ -11,7 +11,7 @@
from typing import Annotated, Any, Literal, Protocol, runtime_checkable from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body, Query from fastapi import Body, Query
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, field_validator
from llama_stack_api.common.tracing import telemetry_traceable from llama_stack_api.common.tracing import telemetry_traceable
from llama_stack_api.inference import InterleavedContent from llama_stack_api.inference import InterleavedContent
@ -372,6 +372,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus") register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
# VectorStoreFileAttributes type with OpenAPI constraints
VectorStoreFileAttributes = Annotated[
dict[str, Annotated[str, Field(max_length=512)] | float | bool],
Field(
max_length=16,
json_schema_extra={
"propertyNames": {"type": "string", "maxLength": 64},
"x-oaiTypeLabel": "map",
},
description=(
"Set of 16 key-value pairs that can be attached to an object. This can be "
"useful for storing additional information about the object in a structured "
"format, and querying for objects via API or the dashboard. Keys are strings "
"with a maximum length of 64 characters. Values are strings with a maximum "
"length of 512 characters, booleans, or numbers."
),
),
]
def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
"""
Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
Converts dict[str, Any] to dict[str, str | float | bool]:
- Preserves: str (truncated to 512 chars), bool, int/float (as float)
- Converts: list -> comma-separated string
- Filters: dict, None, other types
- Enforces: max 16 properties, max 64 char keys, max 512 char string values
"""
if not metadata:
return {}
sanitized: dict[str, str | float | bool] = {}
for key, value in metadata.items():
# Enforce max 16 properties
if len(sanitized) >= 16:
break
# Enforce max 64 char keys
if len(key) > 64:
continue
# Convert to supported primitive types
if isinstance(value, bool):
sanitized[key] = value
elif isinstance(value, int | float):
sanitized[key] = float(value)
elif isinstance(value, str):
# Enforce max 512 char string values
sanitized[key] = value[:512] if len(value) > 512 else value
elif isinstance(value, list):
# Convert lists to comma-separated strings (max 512 chars)
list_str = ", ".join(str(item) for item in value)
sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
return sanitized
@json_schema_type @json_schema_type
class VectorStoreFileObject(BaseModel): class VectorStoreFileObject(BaseModel):
"""OpenAI Vector Store File object. """OpenAI Vector Store File object.
@ -389,7 +448,7 @@ class VectorStoreFileObject(BaseModel):
id: str id: str
object: str = "vector_store.file" object: str = "vector_store.file"
attributes: dict[str, Any] = Field(default_factory=dict) attributes: VectorStoreFileAttributes = Field(default_factory=dict)
chunking_strategy: VectorStoreChunkingStrategy chunking_strategy: VectorStoreChunkingStrategy
created_at: int created_at: int
last_error: VectorStoreFileLastError | None = None last_error: VectorStoreFileLastError | None = None
@ -397,6 +456,12 @@ class VectorStoreFileObject(BaseModel):
usage_bytes: int = 0 usage_bytes: int = 0
vector_store_id: str vector_store_id: str
@field_validator("attributes", mode="before")
@classmethod
def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
"""Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
return _sanitize_vector_store_attributes(v)
@json_schema_type @json_schema_type
class VectorStoreListFilesResponse(BaseModel): class VectorStoreListFilesResponse(BaseModel):

View file

@ -211,3 +211,23 @@ def test_asymmetric_embeddings(llama_stack_client, embedding_model_id):
assert query_response.embeddings is not None assert query_response.embeddings is not None
``` ```
## TypeScript Client Replays
TypeScript SDK tests can run alongside Python tests when testing against `server:<config>` stacks. Set `TS_CLIENT_PATH` to the path or version of `llama-stack-client-typescript` to enable:
```bash
# Use published npm package (responses suite)
TS_CLIENT_PATH=^0.3.2 scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
# Use local checkout from ~/.cache (recommended for development)
git clone https://github.com/llamastack/llama-stack-client-typescript.git ~/.cache/llama-stack-client-typescript
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
# Run base suite with TypeScript tests
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup ollama
```
TypeScript tests run immediately after Python tests pass, using the same replay fixtures. The mapping between Python suites/setups and TypeScript test files is defined in `tests/integration/client-typescript/suites.json`.
If `TS_CLIENT_PATH` is unset, TypeScript tests are skipped entirely.

View file

@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
# Verify instructions from previous response was not carried over to the next response # Verify instructions from previous response was not carried over to the next response
assert response_with_instructions2.instructions == instructions2 assert response_with_instructions2.instructions == instructions2
@pytest.mark.skip(reason="Tool calling is not reliable.")
def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
"""Test handling of max_tool_calls with function tools in responses."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
max_tool_calls = 1
tools = [
{
"type": "function",
"name": "get_weather",
"description": "Get weather information for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')",
},
},
},
},
{
"type": "function",
"name": "get_time",
"description": "Get current time for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')",
},
},
},
},
]
# First create a response that triggers function tools
response = client.responses.create(
model=text_model_id,
input="Can you tell me the weather in Paris and the current time?",
tools=tools,
stream=False,
max_tool_calls=max_tool_calls,
)
# Verify we got two function calls and that the max_tool_calls do not affect function tools
assert len(response.output) == 2
assert response.output[0].type == "function_call"
assert response.output[0].name == "get_weather"
assert response.output[0].status == "completed"
assert response.output[1].type == "function_call"
assert response.output[1].name == "get_time"
assert response.output[0].status == "completed"
# Verify we have a valid max_tool_calls field
assert response.max_tool_calls == max_tool_calls
def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
"""Test handling of invalid max_tool_calls in responses."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
input = "Search for today's top technology news."
invalid_max_tool_calls = 0
tools = [
{"type": "web_search"},
]
# Create a response with an invalid max_tool_calls value i.e. 0
# Handle ValueError from LLS and BadRequestError from OpenAI client
with pytest.raises((ValueError, BadRequestError)) as excinfo:
client.responses.create(
model=text_model_id,
input=input,
tools=tools,
stream=False,
max_tool_calls=invalid_max_tool_calls,
)
error_message = str(excinfo.value)
assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
f"Expected error message about invalid max_tool_calls, got: {error_message}"
)
def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
"""Test handling of max_tool_calls with built-in tools in responses."""
if isinstance(client_with_models, LlamaStackAsLibraryClient):
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
client = openai_client
input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
max_tool_calls = [1, 5]
tools = [
{"type": "web_search"},
]
# First create a response that triggers web_search tools without max_tool_calls
response = client.responses.create(
model=text_model_id,
input=input,
tools=tools,
stream=False,
)
# Verify we got two web search calls followed by a message
assert len(response.output) == 3
assert response.output[0].type == "web_search_call"
assert response.output[0].status == "completed"
assert response.output[1].type == "web_search_call"
assert response.output[1].status == "completed"
assert response.output[2].type == "message"
assert response.output[2].status == "completed"
assert response.output[2].role == "assistant"
# Next create a response that triggers web_search tools with max_tool_calls set to 1
response_2 = client.responses.create(
model=text_model_id,
input=input,
tools=tools,
stream=False,
max_tool_calls=max_tool_calls[0],
)
# Verify we got one web search tool call followed by a message
assert len(response_2.output) == 2
assert response_2.output[0].type == "web_search_call"
assert response_2.output[0].status == "completed"
assert response_2.output[1].type == "message"
assert response_2.output[1].status == "completed"
assert response_2.output[1].role == "assistant"
# Verify we have a valid max_tool_calls field
assert response_2.max_tool_calls == max_tool_calls[0]
# Finally create a response that triggers web_search tools with max_tool_calls set to 5
response_3 = client.responses.create(
model=text_model_id,
input=input,
tools=tools,
stream=False,
max_tool_calls=max_tool_calls[1],
)
# Verify we got two web search calls followed by a message
assert len(response_3.output) == 3
assert response_3.output[0].type == "web_search_call"
assert response_3.output[0].status == "completed"
assert response_3.output[1].type == "web_search_call"
assert response_3.output[1].status == "completed"
assert response_3.output[2].type == "message"
assert response_3.output[2].status == "completed"
assert response_3.output[2].role == "assistant"
# Verify we have a valid max_tool_calls field
assert response_3.max_tool_calls == max_tool_calls[1]

View file

@ -0,0 +1,104 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the terms described in the LICENSE file in
// the root directory of this source tree.
/**
* Integration tests for Inference API (Chat Completions).
* Ported from: llama-stack/tests/integration/inference/test_openai_completion.py
*
* IMPORTANT: Test cases must match EXACTLY with Python tests to use recorded API responses.
*/
import { createTestClient, requireTextModel } from '../setup';
describe('Inference API - Chat Completions', () => {
// Test cases matching llama-stack/tests/integration/test_cases/inference/chat_completion.json
const chatCompletionTestCases = [
{
id: 'non_streaming_01',
question: 'Which planet do humans live on?',
expected: 'earth',
testId:
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]',
},
{
id: 'non_streaming_02',
question: 'Which planet has rings around it with a name starting with letter S?',
expected: 'saturn',
testId:
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]',
},
];
const streamingTestCases = [
{
id: 'streaming_01',
question: "What's the name of the Sun in latin?",
expected: 'sol',
testId:
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]',
},
{
id: 'streaming_02',
question: 'What is the name of the US captial?',
expected: 'washington',
testId:
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_02]',
},
];
test.each(chatCompletionTestCases)(
'chat completion non-streaming: $id',
async ({ question, expected, testId }) => {
const client = createTestClient(testId);
const textModel = requireTextModel();
const response = await client.chat.completions.create({
model: textModel,
messages: [
{
role: 'user',
content: question,
},
],
stream: false,
});
// Non-streaming responses have choices with message property
const choice = response.choices[0];
expect(choice).toBeDefined();
if (!choice || !('message' in choice)) {
throw new Error('Expected non-streaming response with message');
}
const content = choice.message.content;
expect(content).toBeDefined();
const messageContent = typeof content === 'string' ? content.toLowerCase().trim() : '';
expect(messageContent.length).toBeGreaterThan(0);
expect(messageContent).toContain(expected.toLowerCase());
},
);
test.each(streamingTestCases)('chat completion streaming: $id', async ({ question, expected, testId }) => {
const client = createTestClient(testId);
const textModel = requireTextModel();
const stream = await client.chat.completions.create({
model: textModel,
messages: [{ role: 'user', content: question }],
stream: true,
});
const streamedContent: string[] = [];
for await (const chunk of stream) {
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
streamedContent.push(chunk.choices[0].delta.content);
}
}
expect(streamedContent.length).toBeGreaterThan(0);
const fullContent = streamedContent.join('').toLowerCase().trim();
expect(fullContent).toContain(expected.toLowerCase());
});
});

View file

@ -0,0 +1,132 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the terms described in the LICENSE file in
// the root directory of this source tree.
/**
* Integration tests for Responses API.
* Ported from: llama-stack/tests/integration/responses/test_basic_responses.py
*
* IMPORTANT: Test cases and IDs must match EXACTLY with Python tests to use recorded API responses.
*/
import { createTestClient, requireTextModel, getResponseOutputText } from '../setup';
describe('Responses API - Basic', () => {
// Test cases matching llama-stack/tests/integration/responses/fixtures/test_cases.py
const basicTestCases = [
{
id: 'earth',
input: 'Which planet do humans live on?',
expected: 'earth',
// Use client_with_models fixture to match non-streaming recordings
testId:
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-earth]',
},
{
id: 'saturn',
input: 'Which planet has rings around it with a name starting with letter S?',
expected: 'saturn',
testId:
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-saturn]',
},
];
test.each(basicTestCases)('non-streaming basic response: $id', async ({ input, expected, testId }) => {
// Create client with test_id for all requests
const client = createTestClient(testId);
const textModel = requireTextModel();
// Create a response
const response = await client.responses.create({
model: textModel,
input,
stream: false,
});
// Verify response has content
const outputText = getResponseOutputText(response).toLowerCase().trim();
expect(outputText.length).toBeGreaterThan(0);
expect(outputText).toContain(expected.toLowerCase());
// Verify usage is reported
expect(response.usage).toBeDefined();
expect(response.usage!.input_tokens).toBeGreaterThan(0);
expect(response.usage!.output_tokens).toBeGreaterThan(0);
expect(response.usage!.total_tokens).toBe(response.usage!.input_tokens + response.usage!.output_tokens);
// Verify stored response matches
const retrievedResponse = await client.responses.retrieve(response.id);
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(response));
// Test follow-up with previous_response_id
const nextResponse = await client.responses.create({
model: textModel,
input: 'Repeat your previous response in all caps.',
previous_response_id: response.id,
});
const nextOutputText = getResponseOutputText(nextResponse).trim();
expect(nextOutputText).toContain(expected.toUpperCase());
});
test.each(basicTestCases)('streaming basic response: $id', async ({ input, expected, testId }) => {
// Modify test_id for streaming variant
const streamingTestId = testId.replace(
'test_response_non_streaming_basic',
'test_response_streaming_basic',
);
const client = createTestClient(streamingTestId);
const textModel = requireTextModel();
// Create a streaming response
const stream = await client.responses.create({
model: textModel,
input,
stream: true,
});
const events: any[] = [];
let responseId = '';
for await (const chunk of stream) {
events.push(chunk);
if (chunk.type === 'response.created') {
// Verify response.created is the first event
expect(events.length).toBe(1);
expect(chunk.response.status).toBe('in_progress');
responseId = chunk.response.id;
} else if (chunk.type === 'response.completed') {
// Verify response.completed comes after response.created
expect(events.length).toBeGreaterThanOrEqual(2);
expect(chunk.response.status).toBe('completed');
expect(chunk.response.id).toBe(responseId);
// Verify content quality
const outputText = getResponseOutputText(chunk.response).toLowerCase().trim();
expect(outputText.length).toBeGreaterThan(0);
expect(outputText).toContain(expected.toLowerCase());
// Verify usage is reported
expect(chunk.response.usage).toBeDefined();
expect(chunk.response.usage!.input_tokens).toBeGreaterThan(0);
expect(chunk.response.usage!.output_tokens).toBeGreaterThan(0);
expect(chunk.response.usage!.total_tokens).toBe(
chunk.response.usage!.input_tokens + chunk.response.usage!.output_tokens,
);
}
}
// Verify we got both events
expect(events.length).toBeGreaterThanOrEqual(2);
const firstEvent = events[0];
const lastEvent = events[events.length - 1];
expect(firstEvent.type).toBe('response.created');
expect(lastEvent.type).toBe('response.completed');
// Verify stored response matches streamed response
const retrievedResponse = await client.responses.retrieve(responseId);
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(lastEvent.response));
});
});

View file

@ -0,0 +1,31 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the terms described in the LICENSE file in
// the root directory of this source tree.
/** @type {import('ts-jest').JestConfigWithTsJest} */
module.exports = {
preset: 'ts-jest/presets/default-esm',
testEnvironment: 'node',
extensionsToTreatAsEsm: ['.ts'],
moduleNameMapper: {
'^(\\.{1,2}/.*)\\.js$': '$1',
},
transform: {
'^.+\\.tsx?$': [
'ts-jest',
{
useESM: true,
tsconfig: {
module: 'ES2022',
moduleResolution: 'bundler',
},
},
],
},
testMatch: ['<rootDir>/__tests__/**/*.test.ts'],
setupFilesAfterEnv: ['<rootDir>/setup.ts'],
testTimeout: 60000, // 60 seconds (integration tests can be slow)
watchman: false, // Disable watchman to avoid permission issues
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,18 @@
{
"name": "llama-stack-typescript-integration-tests",
"version": "0.0.1",
"private": true,
"description": "TypeScript client integration tests for Llama Stack",
"scripts": {
"test": "node run-tests.js"
},
"devDependencies": {
"@swc/core": "^1.3.102",
"@swc/jest": "^0.2.29",
"@types/jest": "^29.4.0",
"@types/node": "^20.0.0",
"jest": "^29.4.0",
"ts-jest": "^29.1.0",
"typescript": "^5.0.0"
}
}

View file

@ -0,0 +1,63 @@
#!/usr/bin/env node
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the terms described in the LICENSE file in
// the root directory of this source tree.
/**
* Test runner that finds and executes TypeScript tests based on suite/setup mapping.
* Called by integration-tests.sh via npm test.
*/
const fs = require('fs');
const path = require('path');
const { execSync } = require('child_process');
const suite = process.env.LLAMA_STACK_TEST_SUITE;
const setup = process.env.LLAMA_STACK_TEST_SETUP || '';
if (!suite) {
console.error('Error: LLAMA_STACK_TEST_SUITE environment variable is required');
process.exit(1);
}
// Read suites.json to find matching test files
const suitesPath = path.join(__dirname, 'suites.json');
if (!fs.existsSync(suitesPath)) {
console.log(`No TypeScript tests configured (${suitesPath} not found)`);
process.exit(0);
}
const suites = JSON.parse(fs.readFileSync(suitesPath, 'utf-8'));
// Find matching entry
let testFiles = [];
for (const entry of suites) {
if (entry.suite !== suite) {
continue;
}
const entrySetup = entry.setup || '';
if (entrySetup && entrySetup !== setup) {
continue;
}
testFiles = entry.files || [];
break;
}
if (testFiles.length === 0) {
console.log(`No TypeScript integration tests mapped for suite ${suite} (setup ${setup})`);
process.exit(0);
}
console.log(`Running TypeScript tests for suite ${suite} (setup ${setup}): ${testFiles.join(', ')}`);
// Run Jest with the mapped test files
try {
execSync(`npx jest --config jest.integration.config.js ${testFiles.join(' ')}`, {
stdio: 'inherit',
cwd: __dirname,
});
} catch (error) {
process.exit(error.status || 1);
}

View file

@ -0,0 +1,162 @@
// Copyright (c) Meta Platforms, Inc. and affiliates.
// All rights reserved.
//
// This source code is licensed under the terms described in the LICENSE file in
// the root directory of this source tree.
/**
* Global setup for integration tests.
* This file mimics pytest's fixture system by providing shared test configuration.
*/
import LlamaStackClient from 'llama-stack-client';
/**
* Load test configuration from the Python setup system.
* This reads setup definitions from tests/integration/suites.py via get_setup_env.py.
*/
function loadTestConfig() {
const baseURL = process.env['TEST_API_BASE_URL'];
const setupName = process.env['LLAMA_STACK_TEST_SETUP'];
const textModel = process.env['LLAMA_STACK_TEST_TEXT_MODEL'];
const embeddingModel = process.env['LLAMA_STACK_TEST_EMBEDDING_MODEL'];
if (!baseURL) {
throw new Error(
'TEST_API_BASE_URL is required for integration tests. ' +
'Run tests using: ./scripts/integration-test.sh',
);
}
return {
baseURL,
textModel,
embeddingModel,
setupName,
};
}
// Read configuration from environment variables (set by scripts/integration-test.sh)
export const TEST_CONFIG = loadTestConfig();
// Validate required configuration
beforeAll(() => {
console.log('\n=== Integration Test Configuration ===');
console.log(`Base URL: ${TEST_CONFIG.baseURL}`);
console.log(`Setup: ${TEST_CONFIG.setupName || 'NOT SET'}`);
console.log(
`Text Model: ${TEST_CONFIG.textModel || 'NOT SET - tests requiring text model will be skipped'}`,
);
console.log(
`Embedding Model: ${
TEST_CONFIG.embeddingModel || 'NOT SET - tests requiring embedding model will be skipped'
}`,
);
console.log('=====================================\n');
});
/**
* Create a client instance for integration tests.
* Mimics pytest's `llama_stack_client` fixture.
*
* @param testId - Test ID to send in X-LlamaStack-Provider-Data header for replay mode.
* Format: "tests/integration/responses/test_basic_responses.py::test_name[params]"
*/
export function createTestClient(testId?: string): LlamaStackClient {
const headers: Record<string, string> = {};
// In server mode with replay, send test ID for recording isolation
if (process.env['LLAMA_STACK_TEST_STACK_CONFIG_TYPE'] === 'server' && testId) {
headers['X-LlamaStack-Provider-Data'] = JSON.stringify({
__test_id: testId,
});
}
return new LlamaStackClient({
baseURL: TEST_CONFIG.baseURL,
timeout: 60000, // 60 seconds
defaultHeaders: headers,
});
}
/**
* Skip test if required model is not configured.
* Mimics pytest's `skip_if_no_model` autouse fixture.
*/
export function skipIfNoModel(modelType: 'text' | 'embedding'): typeof test {
const model = modelType === 'text' ? TEST_CONFIG.textModel : TEST_CONFIG.embeddingModel;
if (!model) {
const envVar = modelType === 'text' ? 'LLAMA_STACK_TEST_TEXT_MODEL' : 'LLAMA_STACK_TEST_EMBEDDING_MODEL';
const message = `Skipping: ${modelType} model not configured (set ${envVar})`;
return test.skip.bind(test) as typeof test;
}
return test;
}
/**
* Get the configured text model, throwing if not set.
* Use this in tests that absolutely require a text model.
*/
export function requireTextModel(): string {
if (!TEST_CONFIG.textModel) {
throw new Error(
'LLAMA_STACK_TEST_TEXT_MODEL environment variable is required. ' +
'Run tests using: ./scripts/integration-test.sh',
);
}
return TEST_CONFIG.textModel;
}
/**
* Get the configured embedding model, throwing if not set.
* Use this in tests that absolutely require an embedding model.
*/
export function requireEmbeddingModel(): string {
if (!TEST_CONFIG.embeddingModel) {
throw new Error(
'LLAMA_STACK_TEST_EMBEDDING_MODEL environment variable is required. ' +
'Run tests using: ./scripts/integration-test.sh',
);
}
return TEST_CONFIG.embeddingModel;
}
/**
* Extracts aggregated text output from a ResponseObject.
* This concatenates all text content from the response's output array.
*
* Copied from llama-stack-client's response-helpers until it's available in published version.
*/
export function getResponseOutputText(response: any): string {
const pieces: string[] = [];
for (const output of response.output ?? []) {
if (!output || output.type !== 'message') {
continue;
}
const content = output.content;
if (typeof content === 'string') {
pieces.push(content);
continue;
}
if (!Array.isArray(content)) {
continue;
}
for (const item of content) {
if (typeof item === 'string') {
pieces.push(item);
continue;
}
if (item && item.type === 'output_text' && 'text' in item && typeof item.text === 'string') {
pieces.push(item.text);
}
}
}
return pieces.join('');
}

View file

@ -0,0 +1,12 @@
[
{
"suite": "responses",
"setup": "gpt",
"files": ["__tests__/responses.test.ts"]
},
{
"suite": "base",
"setup": "ollama",
"files": ["__tests__/inference.test.ts"]
}
]

View file

@ -0,0 +1,16 @@
{
"compilerOptions": {
"target": "ES2022",
"module": "ES2022",
"lib": ["ES2022"],
"moduleResolution": "bundler",
"esModuleInterop": true,
"allowSyntheticDefaultImports": true,
"strict": true,
"skipLibCheck": true,
"resolveJsonModule": true,
"types": ["jest", "node"]
},
"include": ["**/*.ts"],
"exclude": ["node_modules"]
}

View file

@ -0,0 +1,773 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "get_user_id",
"description": "\n Get the user ID for a given username. This ID is needed for other operations.\n\n :param username: The username to look up\n :return: The user ID for the username\n ",
"parameters": {
"properties": {
"username": {
"title": "Username",
"type": "string"
}
},
"required": [
"username"
],
"title": "get_user_idArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_user_permissions",
"description": "\n Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n :param user_id: The user ID to check permissions for\n :return: The permissions for the user\n ",
"parameters": {
"properties": {
"user_id": {
"title": "User Id",
"type": "string"
}
},
"required": [
"user_id"
],
"title": "get_user_permissionsArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "check_file_access",
"description": "\n Check if a user can access a specific file. Requires a valid user ID.\n\n :param user_id: The user ID to check access for\n :param filename: The filename to check access to\n :return: Whether the user can access the file (yes/no)\n ",
"parameters": {
"properties": {
"user_id": {
"title": "User Id",
"type": "string"
},
"filename": {
"title": "Filename",
"type": "string"
}
},
"required": [
"user_id",
"filename"
],
"title": "check_file_accessArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_experiment_id",
"description": "\n Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n :param experiment_name: The name of the experiment\n :return: The experiment ID\n ",
"parameters": {
"properties": {
"experiment_name": {
"title": "Experiment Name",
"type": "string"
}
},
"required": [
"experiment_name"
],
"title": "get_experiment_idArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_experiment_results",
"description": "\n Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n :param experiment_id: The experiment ID to get results for\n :return: The experiment results\n ",
"parameters": {
"properties": {
"experiment_id": {
"title": "Experiment Id",
"type": "string"
}
},
"required": [
"experiment_id"
],
"title": "get_experiment_resultsArguments",
"type": "object"
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "1V9w3bXnppL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
"function": {
"arguments": "",
"name": "get_experiment_id"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "YEsj"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"ex",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "n"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "perim",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ent_na",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "me\":",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "U"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " \"boi",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ling_p",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "oint",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "ha"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "d5D"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
"function": {
"arguments": "",
"name": "get_user_id"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "0LbsjDcKz6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "{\"us",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "c"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "ernam",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "e\": \"c",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "7C0WFn181I3y3l"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "harl",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "wf"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "ie\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "r"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "FAci"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-1997dc007d20",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": {
"completion_tokens": 51,
"prompt_tokens": 393,
"total_tokens": 444,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "6xgpRRdKjviPT"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,593 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[openai_client-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "Can you tell me the weather in Paris and the current time?"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"type": "function",
"name": "get_weather",
"description": "Get weather information for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')"
}
}
},
"strict": null
}
},
{
"type": "function",
"function": {
"type": "function",
"name": "get_time",
"description": "Get current time for a specified location",
"parameters": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "The city name (e.g., 'New York', 'London')"
}
}
},
"strict": null
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "QmTXstGvpa8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": "call_HJMoLtHXfCzhlMQOfqIKt0n3",
"function": {
"arguments": "",
"name": "get_weather"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "iFjmkK23KL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"lo",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "7"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "catio",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "L"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "n\": \"P",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "THa6gWbrWhVmZ6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "aris",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "eL"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "jng"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": "call_vGKvTKZM7aALMaUw3Jas7lRg",
"function": {
"arguments": "",
"name": "get_time"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "LSailgMcgSl54"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "{\"lo",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "z"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "catio",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "4"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "n\": \"P",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "0engr6vRvqXTEP"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "aris",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "Pe"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "LU9"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": null,
"obfuscation": "kD7d"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-463ab0e2f291",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_b1442291a8",
"usage": {
"completion_tokens": 44,
"prompt_tokens": 110,
"total_tokens": 154,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "R4ICoxqTqj7ZY"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

View file

@ -0,0 +1,773 @@
{
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
"request": {
"method": "POST",
"url": "https://api.openai.com/v1/v1/chat/completions",
"headers": {},
"body": {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
}
],
"stream": true,
"stream_options": {
"include_usage": true
},
"tools": [
{
"type": "function",
"function": {
"name": "get_user_id",
"description": "\n Get the user ID for a given username. This ID is needed for other operations.\n\n :param username: The username to look up\n :return: The user ID for the username\n ",
"parameters": {
"properties": {
"username": {
"title": "Username",
"type": "string"
}
},
"required": [
"username"
],
"title": "get_user_idArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_user_permissions",
"description": "\n Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n :param user_id: The user ID to check permissions for\n :return: The permissions for the user\n ",
"parameters": {
"properties": {
"user_id": {
"title": "User Id",
"type": "string"
}
},
"required": [
"user_id"
],
"title": "get_user_permissionsArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "check_file_access",
"description": "\n Check if a user can access a specific file. Requires a valid user ID.\n\n :param user_id: The user ID to check access for\n :param filename: The filename to check access to\n :return: Whether the user can access the file (yes/no)\n ",
"parameters": {
"properties": {
"user_id": {
"title": "User Id",
"type": "string"
},
"filename": {
"title": "Filename",
"type": "string"
}
},
"required": [
"user_id",
"filename"
],
"title": "check_file_accessArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_experiment_id",
"description": "\n Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n :param experiment_name: The name of the experiment\n :return: The experiment ID\n ",
"parameters": {
"properties": {
"experiment_name": {
"title": "Experiment Name",
"type": "string"
}
},
"required": [
"experiment_name"
],
"title": "get_experiment_idArguments",
"type": "object"
}
}
},
{
"type": "function",
"function": {
"name": "get_experiment_results",
"description": "\n Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n :param experiment_id: The experiment ID to get results for\n :return: The experiment results\n ",
"parameters": {
"properties": {
"experiment_id": {
"title": "Experiment Id",
"type": "string"
}
},
"required": [
"experiment_id"
],
"title": "get_experiment_resultsArguments",
"type": "object"
}
}
}
]
},
"endpoint": "/v1/chat/completions",
"model": "gpt-4o"
},
"response": {
"body": [
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": "assistant",
"tool_calls": null
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "N5OTLR9CfmU"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
"function": {
"arguments": "",
"name": "get_experiment_id"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "3EKK"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "{\"ex",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "R"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "perim",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "Q"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ent_na",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "me\":",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "6"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": " \"boi",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "ling_p",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": ""
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "oint",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "pw"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 0,
"id": null,
"function": {
"arguments": "\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "Gfk"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
"function": {
"arguments": "",
"name": "get_user_id"
},
"type": "function"
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "Yp7IueDs5V"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "{\"us",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "8"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "ernam",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "X"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "e\": \"c",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "2oif8BwVnTCnAF"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "harl",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "hv"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": [
{
"index": 1,
"id": null,
"function": {
"arguments": "ie\"}",
"name": null
},
"type": null
}
]
},
"finish_reason": null,
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "C"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [
{
"delta": {
"content": null,
"function_call": null,
"refusal": null,
"role": null,
"tool_calls": null
},
"finish_reason": "tool_calls",
"index": 0,
"logprobs": null
}
],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": null,
"obfuscation": "ctjO"
}
},
{
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
"__data__": {
"id": "rec-b218af7fa066",
"choices": [],
"created": 0,
"model": "gpt-4o-2024-08-06",
"object": "chat.completion.chunk",
"service_tier": "default",
"system_fingerprint": "fp_c98e05ca17",
"usage": {
"completion_tokens": 51,
"prompt_tokens": 393,
"total_tokens": 444,
"completion_tokens_details": {
"accepted_prediction_tokens": 0,
"audio_tokens": 0,
"reasoning_tokens": 0,
"rejected_prediction_tokens": 0
},
"prompt_tokens_details": {
"audio_tokens": 0,
"cached_tokens": 0
}
},
"obfuscation": "fclbZeBSSKN4C"
}
}
],
"is_streaming": true
},
"id_normalization_mapping": {}
}

Some files were not shown because too many files have changed in this diff Show more