mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 01:48:05 +00:00
Merge branch 'main' into custom-collection-name-vectordb
This commit is contained in:
commit
3f5576b7d6
117 changed files with 16294 additions and 769 deletions
2
.github/CODEOWNERS
vendored
2
.github/CODEOWNERS
vendored
|
|
@ -2,4 +2,4 @@
|
|||
|
||||
# These owners will be the default owners for everything in
|
||||
# the repo. Unless a later match takes precedence,
|
||||
* @ashwinb @yanxi0830 @hardikjshah @raghotham @ehhuang @leseb @bbrowning @reluctantfuturist @mattf @slekkala1 @franciscojavierarceo
|
||||
* @ashwinb @raghotham @ehhuang @leseb @bbrowning @mattf @franciscojavierarceo
|
||||
|
|
|
|||
35
.github/actions/setup-typescript-client/action.yml
vendored
Normal file
35
.github/actions/setup-typescript-client/action.yml
vendored
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
name: Setup TypeScript client
|
||||
description: Conditionally checkout and link llama-stack-client-typescript based on client-version
|
||||
inputs:
|
||||
client-version:
|
||||
description: 'Client version (latest or published)'
|
||||
required: true
|
||||
|
||||
outputs:
|
||||
ts-client-path:
|
||||
description: 'Path or version to use for TypeScript client'
|
||||
value: ${{ steps.set-path.outputs.ts-client-path }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Checkout TypeScript client (latest)
|
||||
if: ${{ inputs.client-version == 'latest' }}
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
repository: llamastack/llama-stack-client-typescript
|
||||
ref: main
|
||||
path: .ts-client-checkout
|
||||
|
||||
- name: Set TS_CLIENT_PATH
|
||||
id: set-path
|
||||
shell: bash
|
||||
run: |
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "ts-client-path=${{ github.workspace }}/.ts-client-checkout" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "ts-client-path=^0.3.2" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
16
.github/workflows/integration-tests.yml
vendored
16
.github/workflows/integration-tests.yml
vendored
|
|
@ -93,11 +93,27 @@ jobs:
|
|||
suite: ${{ matrix.config.suite }}
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Setup Node.js for TypeScript client tests
|
||||
if: ${{ matrix.client == 'server' }}
|
||||
uses: actions/setup-node@39370e3970a6d050c480ffad4ff0ed4d3fdee5af # v4.1.0
|
||||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: tests/integration/client-typescript/package-lock.json
|
||||
|
||||
- name: Setup TypeScript client
|
||||
if: ${{ matrix.client == 'server' }}
|
||||
id: setup-ts-client
|
||||
uses: ./.github/actions/setup-typescript-client
|
||||
with:
|
||||
client-version: ${{ matrix.client-version }}
|
||||
|
||||
- name: Run tests
|
||||
if: ${{ matrix.config.allowed_clients == null || contains(matrix.config.allowed_clients, matrix.client) }}
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
env:
|
||||
OPENAI_API_KEY: dummy
|
||||
TS_CLIENT_PATH: ${{ steps.setup-ts-client.outputs.ts-client-path || '' }}
|
||||
with:
|
||||
stack-config: >-
|
||||
${{ matrix.config.stack_config
|
||||
|
|
|
|||
48
.github/workflows/stainless-builds.yml
vendored
48
.github/workflows/stainless-builds.yml
vendored
|
|
@ -43,7 +43,41 @@ env:
|
|||
# Stainless organization dashboard
|
||||
|
||||
jobs:
|
||||
compute-branch:
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
preview_branch: ${{ steps.compute.outputs.preview_branch }}
|
||||
base_branch: ${{ steps.compute.outputs.base_branch }}
|
||||
merge_branch: ${{ steps.compute.outputs.merge_branch }}
|
||||
steps:
|
||||
- name: Compute branch names
|
||||
id: compute
|
||||
run: |
|
||||
HEAD_REPO="${{ github.event.pull_request.head.repo.full_name }}"
|
||||
BASE_REPO="${{ github.repository }}"
|
||||
BRANCH_NAME="${{ github.event.pull_request.head.ref }}"
|
||||
FORK_OWNER="${{ github.event.pull_request.head.repo.owner.login }}"
|
||||
|
||||
if [ "$HEAD_REPO" != "$BASE_REPO" ]; then
|
||||
# Fork PR: prefix with fork owner for isolation
|
||||
if [ -z "$FORK_OWNER" ]; then
|
||||
echo "Error: Fork PR detected but fork owner is empty" >&2
|
||||
exit 1
|
||||
fi
|
||||
PREVIEW_BRANCH="preview/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${FORK_OWNER}/${BRANCH_NAME}"
|
||||
else
|
||||
# Same-repo PR
|
||||
PREVIEW_BRANCH="preview/${BRANCH_NAME}"
|
||||
BASE_BRANCH="preview/base/${BRANCH_NAME}"
|
||||
fi
|
||||
|
||||
echo "preview_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "base_branch=${BASE_BRANCH}" >> $GITHUB_OUTPUT
|
||||
echo "merge_branch=${PREVIEW_BRANCH}" >> $GITHUB_OUTPUT
|
||||
|
||||
preview:
|
||||
needs: compute-branch
|
||||
if: github.event.action != 'closed'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -59,8 +93,6 @@ jobs:
|
|||
ref: ${{ github.event.pull_request.head.sha }}
|
||||
fetch-depth: 2
|
||||
|
||||
# This action builds preview SDKs from the OpenAPI spec changes and
|
||||
# posts/updates a comment on the PR with build results and links to the preview.
|
||||
- name: Run preview builds
|
||||
uses: stainless-api/upload-openapi-spec-action/preview@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
|
||||
with:
|
||||
|
|
@ -73,8 +105,11 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
branch: ${{ needs.compute-branch.outputs.preview_branch }}
|
||||
base_branch: ${{ needs.compute-branch.outputs.base_branch }}
|
||||
|
||||
merge:
|
||||
needs: compute-branch
|
||||
if: github.event.action == 'closed' && github.event.pull_request.merged == true
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
|
|
@ -91,11 +126,11 @@ jobs:
|
|||
fetch-depth: 2
|
||||
|
||||
# Note that this only merges in changes that happened on the last build on
|
||||
# preview/${{ github.head_ref }}. It's possible that there are OAS/config
|
||||
# changes that haven't been built, if the preview-sdk job didn't finish
|
||||
# the computed preview branch. It's possible that there are OAS/config
|
||||
# changes that haven't been built, if the preview job didn't finish
|
||||
# before this step starts. In theory we want to wait for all builds
|
||||
# against preview/${{ github.head_ref }} to complete, but assuming that
|
||||
# the preview-sdk job happens before the PR merge, it should be fine.
|
||||
# against the preview branch to complete, but assuming that
|
||||
# the preview job happens before the PR merge, it should be fine.
|
||||
- name: Run merge build
|
||||
uses: stainless-api/upload-openapi-spec-action/merge@32823b096b4319c53ee948d702d9052873af485f # 1.6.0
|
||||
with:
|
||||
|
|
@ -108,3 +143,4 @@ jobs:
|
|||
base_sha: ${{ github.event.pull_request.base.sha }}
|
||||
base_ref: ${{ github.event.pull_request.base.ref }}
|
||||
head_sha: ${{ github.event.pull_request.head.sha }}
|
||||
merge_branch: ${{ needs.compute-branch.outputs.merge_branch }}
|
||||
|
|
|
|||
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -35,3 +35,5 @@ docs/static/imported-files/
|
|||
docs/docs/api-deprecated/
|
||||
docs/docs/api-experimental/
|
||||
docs/docs/api/
|
||||
tests/integration/client-typescript/node_modules/
|
||||
.ts-client-checkout/
|
||||
|
|
|
|||
|
|
@ -9862,9 +9862,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `api_base` | `HttpUrl` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com) |
|
||||
| `base_url` | `HttpUrl \| None` | No | | Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1) |
|
||||
| `api_version` | `str \| None` | No | | Azure API version for Azure (e.g., 2024-12-01-preview) |
|
||||
| `api_type` | `str \| None` | No | azure | Azure API type for Azure (e.g., azure) |
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ https://learn.microsoft.com/en-us/azure/ai-foundry/openai/overview
|
|||
|
||||
```yaml
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Cerebras inference provider for running models on Cerebras Cloud platform.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `base_url` | `str` | No | https://api.cerebras.ai | Base URL for the Cerebras API |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.cerebras.ai/v1 | Base URL for the Cerebras API |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Databricks inference provider for running models on Databricks' unified analytic
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The Databricks API token |
|
||||
| `url` | `str \| None` | No | | The URL for the Databricks model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the Databricks model serving endpoint (should include /serving-endpoints path) |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.DATABRICKS_HOST:=}
|
||||
base_url: ${env.DATABRICKS_HOST:=}
|
||||
api_token: ${env.DATABRICKS_TOKEN:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Fireworks AI inference provider for Llama models and other AI models on the Fire
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.fireworks.ai/inference/v1 | The URL for the Fireworks server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Groq inference provider for ultra-fast inference using Groq's LPU technology.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.groq.com | The URL for the Groq AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.groq.com/openai/v1 | The URL for the Groq AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Llama OpenAI-compatible provider for using Llama models with OpenAI API format.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `openai_compat_api_base` | `str` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.llama.com/compat/v1/ | The URL for the Llama API server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
openai_compat_api_base: https://api.llama.com/compat/v1/
|
||||
base_url: https://api.llama.com/compat/v1/
|
||||
api_key: ${env.LLAMA_API_KEY}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,15 +17,13 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://integrate.api.nvidia.com/v1 | A base url for accessing the NVIDIA NIM |
|
||||
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
|
||||
| `append_api_version` | `bool` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||
| `rerank_model_to_url` | `dict[str, str]` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ Ollama inference provider for running local models through the Ollama runtime.
|
|||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str` | No | http://localhost:11434 | |
|
||||
| `base_url` | `HttpUrl \| None` | No | http://localhost:11434/v1 | |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ OpenAI inference provider for accessing GPT models and other OpenAI services.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `base_url` | `str` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.openai.com/v1 | Base URL for OpenAI API |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Passthrough inference provider for connecting to any external inference service
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | | The URL for the passthrough endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the passthrough endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.PASSTHROUGH_URL}
|
||||
base_url: ${env.PASSTHROUGH_URL}
|
||||
api_key: ${env.PASSTHROUGH_API_KEY}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ RunPod inference provider for running models on RunPod's cloud GPU platform.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The API token |
|
||||
| `url` | `str \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the Runpod model serving endpoint |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.RUNPOD_URL:=}
|
||||
base_url: ${env.RUNPOD_URL:=}
|
||||
api_token: ${env.RUNPOD_API_TOKEN}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ SambaNova inference provider for running models on SambaNova's dataflow architec
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.sambanova.ai/v1 | The URL for the SambaNova AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -16,10 +16,10 @@ Text Generation Inference (TGI) provider for HuggingFace model serving.
|
|||
|-------|------|----------|---------|-------------|
|
||||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `url` | `str` | No | | The URL for the TGI serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the TGI serving endpoint (should include /v1 path) |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,11 +17,11 @@ Together AI inference provider for open-source models and collaborative AI devel
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://api.together.xyz/v1 | The URL for the Together AI server |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
```
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ Remote vLLM inference provider for connecting to vLLM servers.
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_token` | `SecretStr \| None` | No | | The API token |
|
||||
| `url` | `str \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `base_url` | `HttpUrl \| None` | No | | The URL for the vLLM model serving endpoint |
|
||||
| `max_tokens` | `int` | No | 4096 | Maximum number of tokens to generate. |
|
||||
| `tls_verify` | `bool \| str` | No | True | Whether to verify TLS certificates. Can be a boolean or a path to a CA certificate file. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@ IBM WatsonX inference provider for accessing AI models on IBM's WatsonX platform
|
|||
| `allowed_models` | `list[str] \| None` | No | | List of models that should be registered with the model registry. If None, all models are allowed. |
|
||||
| `refresh_models` | `bool` | No | False | Whether to refresh models periodically from the provider |
|
||||
| `api_key` | `SecretStr \| None` | No | | Authentication credential for the provider |
|
||||
| `url` | `str` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `base_url` | `HttpUrl \| None` | No | https://us-south.ml.cloud.ibm.com | A base url for accessing the watsonx.ai |
|
||||
| `project_id` | `str \| None` | No | | The watsonx.ai project ID |
|
||||
| `timeout` | `int` | No | 60 | Timeout for the HTTP requests |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
```
|
||||
|
|
|
|||
122
docs/package-lock.json
generated
122
docs/package-lock.json
generated
|
|
@ -10712,12 +10712,6 @@
|
|||
"integrity": "sha512-QMUezzXWII9EV5aTFXW1UBVUO77wYPpjqIF8/AviUCThNeSYZykpoTixUeaNNBwmCev0AMDWMAni+f8Hxb1IFw==",
|
||||
"license": "Unlicense"
|
||||
},
|
||||
"node_modules/fs.realpath": {
|
||||
"version": "1.0.0",
|
||||
"resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
|
||||
"integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/fsevents": {
|
||||
"version": "2.3.3",
|
||||
"resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz",
|
||||
|
|
@ -10821,21 +10815,20 @@
|
|||
"license": "ISC"
|
||||
},
|
||||
"node_modules/glob": {
|
||||
"version": "7.2.3",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz",
|
||||
"integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==",
|
||||
"deprecated": "Glob versions prior to v9 are no longer supported",
|
||||
"version": "10.5.0",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.5.0.tgz",
|
||||
"integrity": "sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"fs.realpath": "^1.0.0",
|
||||
"inflight": "^1.0.4",
|
||||
"inherits": "2",
|
||||
"minimatch": "^3.1.1",
|
||||
"once": "^1.3.0",
|
||||
"path-is-absolute": "^1.0.0"
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
|
|
@ -10859,26 +10852,19 @@
|
|||
"integrity": "sha512-lkX1HJXwyMcprw/5YUZc2s7DrpAiHB21/V+E1rHUrVNokkvB6bqMzT0VfV6/86ZNabt1k14YOIaT7nDvOX3Iiw==",
|
||||
"license": "BSD-2-Clause"
|
||||
},
|
||||
"node_modules/glob/node_modules/brace-expansion": {
|
||||
"version": "1.1.12",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.12.tgz",
|
||||
"integrity": "sha512-9T9UjW3r0UW5c1Q7GTwllptXwhvYmEzFhzMfZ9H7FQWt+uZePjZPjBP/W1ZEyZ1twGWom5/56TF4lPcqjnDHcg==",
|
||||
"license": "MIT",
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
}
|
||||
},
|
||||
"node_modules/glob/node_modules/minimatch": {
|
||||
"version": "3.1.2",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz",
|
||||
"integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==",
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/global-dirs": {
|
||||
|
|
@ -11792,17 +11778,6 @@
|
|||
"node": ">=12"
|
||||
}
|
||||
},
|
||||
"node_modules/inflight": {
|
||||
"version": "1.0.6",
|
||||
"resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz",
|
||||
"integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==",
|
||||
"deprecated": "This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful.",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"once": "^1.3.0",
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/inherits": {
|
||||
"version": "2.0.4",
|
||||
"resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz",
|
||||
|
|
@ -15570,15 +15545,6 @@
|
|||
"node": ">= 0.8"
|
||||
}
|
||||
},
|
||||
"node_modules/once": {
|
||||
"version": "1.4.0",
|
||||
"resolved": "https://registry.npmjs.org/once/-/once-1.4.0.tgz",
|
||||
"integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"wrappy": "1"
|
||||
}
|
||||
},
|
||||
"node_modules/onetime": {
|
||||
"version": "5.1.2",
|
||||
"resolved": "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz",
|
||||
|
|
@ -15955,15 +15921,6 @@
|
|||
"node": "^12.20.0 || ^14.13.1 || >=16.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/path-is-absolute": {
|
||||
"version": "1.0.1",
|
||||
"resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz",
|
||||
"integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==",
|
||||
"license": "MIT",
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/path-is-inside": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/path-is-inside/-/path-is-inside-1.0.2.tgz",
|
||||
|
|
@ -20038,41 +19995,6 @@
|
|||
"node": ">= 6"
|
||||
}
|
||||
},
|
||||
"node_modules/sucrase/node_modules/glob": {
|
||||
"version": "10.4.5",
|
||||
"resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz",
|
||||
"integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"foreground-child": "^3.1.0",
|
||||
"jackspeak": "^3.1.2",
|
||||
"minimatch": "^9.0.4",
|
||||
"minipass": "^7.1.2",
|
||||
"package-json-from-dist": "^1.0.0",
|
||||
"path-scurry": "^1.11.1"
|
||||
},
|
||||
"bin": {
|
||||
"glob": "dist/esm/bin.mjs"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/sucrase/node_modules/minimatch": {
|
||||
"version": "9.0.5",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz",
|
||||
"integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"brace-expansion": "^2.0.1"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=16 || 14 >=14.17"
|
||||
},
|
||||
"funding": {
|
||||
"url": "https://github.com/sponsors/isaacs"
|
||||
}
|
||||
},
|
||||
"node_modules/supports-color": {
|
||||
"version": "7.2.0",
|
||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-7.2.0.tgz",
|
||||
|
|
@ -21620,12 +21542,6 @@
|
|||
"url": "https://github.com/chalk/strip-ansi?sponsor=1"
|
||||
}
|
||||
},
|
||||
"node_modules/wrappy": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/wrappy/-/wrappy-1.0.2.tgz",
|
||||
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
|
||||
"license": "ISC"
|
||||
},
|
||||
"node_modules/write-file-atomic": {
|
||||
"version": "3.0.3",
|
||||
"resolved": "https://registry.npmjs.org/write-file-atomic/-/write-file-atomic-3.0.3.tgz",
|
||||
|
|
|
|||
|
|
@ -31,6 +31,9 @@
|
|||
"react-dom": "^19.0.0",
|
||||
"remark-code-import": "^1.2.0"
|
||||
},
|
||||
"overrides": {
|
||||
"glob": "^10.5.0"
|
||||
},
|
||||
"browserslist": {
|
||||
"production": [
|
||||
">0.5%",
|
||||
|
|
|
|||
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
14
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -6705,9 +6705,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/experimental-llama-stack-spec.yaml
vendored
14
docs/static/experimental-llama-stack-spec.yaml
vendored
|
|
@ -6061,9 +6061,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/llama-stack-spec.yaml
vendored
14
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -8883,9 +8883,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
14
docs/static/stainless-llama-stack-spec.yaml
vendored
14
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -9862,9 +9862,21 @@ components:
|
|||
title: Object
|
||||
default: vector_store.file
|
||||
attributes:
|
||||
additionalProperties: true
|
||||
additionalProperties:
|
||||
anyOf:
|
||||
- type: string
|
||||
maxLength: 512
|
||||
- type: number
|
||||
- type: boolean
|
||||
title: string | number | boolean
|
||||
propertyNames:
|
||||
type: string
|
||||
maxLength: 64
|
||||
type: object
|
||||
maxProperties: 16
|
||||
title: Attributes
|
||||
description: Set of 16 key-value pairs that can be attached to an object. This can be useful for storing additional information about the object in a structured format, and querying for objects via API or the dashboard. Keys are strings with a maximum length of 64 characters. Values are strings with a maximum length of 512 characters, booleans, or numbers.
|
||||
x-oaiTypeLabel: map
|
||||
chunking_strategy:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/VectorStoreChunkingStrategyAuto'
|
||||
|
|
|
|||
|
|
@ -287,9 +287,9 @@ start_container() {
|
|||
# On macOS/Windows, use host.docker.internal to reach host from container
|
||||
# On Linux with --network host, use localhost
|
||||
if [[ "$(uname)" == "Darwin" ]] || [[ "$(uname)" == *"MINGW"* ]]; then
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434}"
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://host.docker.internal:11434/v1}"
|
||||
else
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434}"
|
||||
OLLAMA_URL="${OLLAMA_URL:-http://localhost:11434/v1}"
|
||||
fi
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OLLAMA_URL=$OLLAMA_URL"
|
||||
|
||||
|
|
|
|||
|
|
@ -16,16 +16,16 @@ import sys
|
|||
from tests.integration.suites import SETUP_DEFINITIONS, SUITE_DEFINITIONS
|
||||
|
||||
|
||||
def get_setup_env_vars(setup_name, suite_name=None):
|
||||
def get_setup_config(setup_name, suite_name=None):
|
||||
"""
|
||||
Get environment variables for a setup, with optional suite default fallback.
|
||||
Get full configuration (env vars + defaults) for a setup.
|
||||
|
||||
Args:
|
||||
setup_name: Name of the setup (e.g., 'ollama', 'gpt')
|
||||
suite_name: Optional suite name to get default setup if setup_name is None
|
||||
|
||||
Returns:
|
||||
Dictionary of environment variables
|
||||
Dictionary with 'env' and 'defaults' keys
|
||||
"""
|
||||
# If no setup specified, try to get default from suite
|
||||
if not setup_name and suite_name:
|
||||
|
|
@ -34,7 +34,7 @@ def get_setup_env_vars(setup_name, suite_name=None):
|
|||
setup_name = suite.default_setup
|
||||
|
||||
if not setup_name:
|
||||
return {}
|
||||
return {"env": {}, "defaults": {}}
|
||||
|
||||
setup = SETUP_DEFINITIONS.get(setup_name)
|
||||
if not setup:
|
||||
|
|
@ -44,27 +44,31 @@ def get_setup_env_vars(setup_name, suite_name=None):
|
|||
)
|
||||
sys.exit(1)
|
||||
|
||||
return setup.env
|
||||
return {"env": setup.env, "defaults": setup.defaults}
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Extract environment variables from a test setup")
|
||||
parser = argparse.ArgumentParser(description="Extract environment variables and defaults from a test setup")
|
||||
parser.add_argument("--setup", help="Setup name (e.g., ollama, gpt)")
|
||||
parser.add_argument("--suite", help="Suite name to get default setup from if --setup not provided")
|
||||
parser.add_argument("--format", choices=["bash", "json"], default="bash", help="Output format (default: bash)")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
env_vars = get_setup_env_vars(args.setup, args.suite)
|
||||
config = get_setup_config(args.setup, args.suite)
|
||||
|
||||
if args.format == "bash":
|
||||
# Output as bash export statements
|
||||
for key, value in env_vars.items():
|
||||
# Output env vars as bash export statements
|
||||
for key, value in config["env"].items():
|
||||
print(f"export {key}='{value}'")
|
||||
# Output defaults as bash export statements with LLAMA_STACK_TEST_ prefix
|
||||
for key, value in config["defaults"].items():
|
||||
env_key = f"LLAMA_STACK_TEST_{key.upper()}"
|
||||
print(f"export {env_key}='{value}'")
|
||||
elif args.format == "json":
|
||||
import json
|
||||
|
||||
print(json.dumps(env_vars))
|
||||
print(json.dumps(config))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
|
|||
|
|
@ -640,7 +640,7 @@ cmd=( run -d "${PLATFORM_OPTS[@]}" --name llama-stack \
|
|||
--network llama-net \
|
||||
-p "${PORT}:${PORT}" \
|
||||
"${server_env_opts[@]}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}" \
|
||||
-e OLLAMA_URL="http://ollama-server:${OLLAMA_PORT}/v1" \
|
||||
"${SERVER_IMAGE}" --port "${PORT}")
|
||||
|
||||
log "🦙 Starting Llama Stack..."
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ TEST_PATTERN=""
|
|||
INFERENCE_MODE="replay"
|
||||
EXTRA_PARAMS=""
|
||||
COLLECT_ONLY=false
|
||||
TYPESCRIPT_ONLY=false
|
||||
|
||||
# Function to display usage
|
||||
usage() {
|
||||
|
|
@ -34,6 +35,7 @@ Options:
|
|||
--subdirs STRING Comma-separated list of test subdirectories to run (overrides suite)
|
||||
--pattern STRING Regex pattern to pass to pytest -k
|
||||
--collect-only Collect tests only without running them (skips server startup)
|
||||
--typescript-only Skip Python tests and run only TypeScript client tests
|
||||
--help Show this help message
|
||||
|
||||
Suites are defined in tests/integration/suites.py and define which tests to run.
|
||||
|
|
@ -90,6 +92,10 @@ while [[ $# -gt 0 ]]; do
|
|||
COLLECT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--typescript-only)
|
||||
TYPESCRIPT_ONLY=true
|
||||
shift
|
||||
;;
|
||||
--help)
|
||||
usage
|
||||
exit 0
|
||||
|
|
@ -181,6 +187,10 @@ echo "$SETUP_ENV"
|
|||
eval "$SETUP_ENV"
|
||||
echo ""
|
||||
|
||||
# Export suite and setup names for TypeScript tests
|
||||
export LLAMA_STACK_TEST_SUITE="$TEST_SUITE"
|
||||
export LLAMA_STACK_TEST_SETUP="$TEST_SETUP"
|
||||
|
||||
ROOT_DIR="$THIS_DIR/.."
|
||||
cd $ROOT_DIR
|
||||
|
||||
|
|
@ -212,6 +222,71 @@ find_available_port() {
|
|||
return 1
|
||||
}
|
||||
|
||||
run_client_ts_tests() {
|
||||
if ! command -v npm &>/dev/null; then
|
||||
echo "npm could not be found; ensure Node.js is installed"
|
||||
return 1
|
||||
fi
|
||||
|
||||
pushd tests/integration/client-typescript >/dev/null
|
||||
|
||||
# Determine if TS_CLIENT_PATH is a directory path or an npm version
|
||||
if [[ -d "$TS_CLIENT_PATH" ]]; then
|
||||
# It's a directory path - use local checkout
|
||||
if [[ ! -f "$TS_CLIENT_PATH/package.json" ]]; then
|
||||
echo "Error: $TS_CLIENT_PATH exists but doesn't look like llama-stack-client-typescript (no package.json)"
|
||||
popd >/dev/null
|
||||
return 1
|
||||
fi
|
||||
echo "Using local llama-stack-client-typescript from: $TS_CLIENT_PATH"
|
||||
|
||||
# Build the TypeScript client first
|
||||
echo "Building TypeScript client..."
|
||||
pushd "$TS_CLIENT_PATH" >/dev/null
|
||||
npm install --silent
|
||||
npm run build --silent
|
||||
popd >/dev/null
|
||||
|
||||
# Install other dependencies first
|
||||
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
|
||||
npm ci --silent
|
||||
else
|
||||
npm install --silent
|
||||
fi
|
||||
|
||||
# Then install the client from local directory
|
||||
echo "Installing llama-stack-client from: $TS_CLIENT_PATH"
|
||||
npm install "$TS_CLIENT_PATH" --silent
|
||||
else
|
||||
# It's an npm version specifier - install from npm
|
||||
echo "Installing llama-stack-client@${TS_CLIENT_PATH} from npm"
|
||||
if [[ "${CI:-}" == "true" || "${CI:-}" == "1" ]]; then
|
||||
npm ci --silent
|
||||
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
|
||||
else
|
||||
npm install "llama-stack-client@${TS_CLIENT_PATH}" --silent
|
||||
fi
|
||||
fi
|
||||
|
||||
# Verify installation
|
||||
echo "Verifying llama-stack-client installation..."
|
||||
if npm list llama-stack-client 2>/dev/null | grep -q llama-stack-client; then
|
||||
echo "✅ llama-stack-client successfully installed"
|
||||
npm list llama-stack-client
|
||||
else
|
||||
echo "❌ llama-stack-client not found in node_modules"
|
||||
echo "Installed packages:"
|
||||
npm list --depth=0
|
||||
popd >/dev/null
|
||||
return 1
|
||||
fi
|
||||
|
||||
echo "Running TypeScript tests for suite $TEST_SUITE (setup $TEST_SETUP)"
|
||||
npm test
|
||||
|
||||
popd >/dev/null
|
||||
}
|
||||
|
||||
# Start Llama Stack Server if needed
|
||||
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
||||
# Find an available port for the server
|
||||
|
|
@ -221,6 +296,7 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
exit 1
|
||||
fi
|
||||
export LLAMA_STACK_PORT
|
||||
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
|
||||
echo "Will use port: $LLAMA_STACK_PORT"
|
||||
|
||||
stop_server() {
|
||||
|
|
@ -298,6 +374,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
exit 1
|
||||
fi
|
||||
export LLAMA_STACK_PORT
|
||||
export TEST_API_BASE_URL="http://localhost:$LLAMA_STACK_PORT"
|
||||
echo "Will use port: $LLAMA_STACK_PORT"
|
||||
|
||||
echo "=== Building Docker Image for distribution: $DISTRO ==="
|
||||
|
|
@ -473,6 +550,8 @@ if [[ -n "$STACK_CONFIG" ]]; then
|
|||
STACK_CONFIG_ARG="--stack-config=$STACK_CONFIG"
|
||||
fi
|
||||
|
||||
# Run Python tests unless typescript-only mode
|
||||
if [[ "$TYPESCRIPT_ONLY" == "false" ]]; then
|
||||
pytest -s -v $PYTEST_TARGET \
|
||||
$STACK_CONFIG_ARG \
|
||||
--inference-mode="$INFERENCE_MODE" \
|
||||
|
|
@ -483,6 +562,11 @@ pytest -s -v $PYTEST_TARGET \
|
|||
--color=yes $EXTRA_PARAMS \
|
||||
--capture=tee-sys
|
||||
exit_code=$?
|
||||
else
|
||||
echo "Skipping Python tests (--typescript-only mode)"
|
||||
exit_code=0
|
||||
fi
|
||||
|
||||
set +x
|
||||
set -e
|
||||
|
||||
|
|
@ -506,5 +590,10 @@ else
|
|||
exit 1
|
||||
fi
|
||||
|
||||
# Run TypeScript client tests if TS_CLIENT_PATH is set
|
||||
if [[ $exit_code -eq 0 && -n "${TS_CLIENT_PATH:-}" && "${LLAMA_STACK_TEST_STACK_CONFIG_TYPE:-}" == "server" ]]; then
|
||||
run_client_ts_tests
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=== Integration Tests Complete ==="
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@ providers:
|
|||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
|
|
|
|||
|
|
@ -16,9 +16,8 @@ providers:
|
|||
- provider_id: nvidia
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
vector_io:
|
||||
- provider_id: faiss
|
||||
provider_type: inline::faiss
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
vector_io:
|
||||
- provider_id: sqlite-vec
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ providers:
|
|||
- provider_id: vllm-inference
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=http://localhost:8000/v1}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -17,32 +17,32 @@ providers:
|
|||
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
|
||||
provider_type: remote::cerebras
|
||||
config:
|
||||
base_url: https://api.cerebras.ai
|
||||
base_url: https://api.cerebras.ai/v1
|
||||
api_key: ${env.CEREBRAS_API_KEY:=}
|
||||
- provider_id: ${env.OLLAMA_URL:+ollama}
|
||||
provider_type: remote::ollama
|
||||
config:
|
||||
url: ${env.OLLAMA_URL:=http://localhost:11434}
|
||||
base_url: ${env.OLLAMA_URL:=http://localhost:11434/v1}
|
||||
- provider_id: ${env.VLLM_URL:+vllm}
|
||||
provider_type: remote::vllm
|
||||
config:
|
||||
url: ${env.VLLM_URL:=}
|
||||
base_url: ${env.VLLM_URL:=}
|
||||
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
|
||||
api_token: ${env.VLLM_API_TOKEN:=fake}
|
||||
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
|
||||
- provider_id: ${env.TGI_URL:+tgi}
|
||||
provider_type: remote::tgi
|
||||
config:
|
||||
url: ${env.TGI_URL:=}
|
||||
base_url: ${env.TGI_URL:=}
|
||||
- provider_id: fireworks
|
||||
provider_type: remote::fireworks
|
||||
config:
|
||||
url: https://api.fireworks.ai/inference/v1
|
||||
base_url: https://api.fireworks.ai/inference/v1
|
||||
api_key: ${env.FIREWORKS_API_KEY:=}
|
||||
- provider_id: together
|
||||
provider_type: remote::together
|
||||
config:
|
||||
url: https://api.together.xyz/v1
|
||||
base_url: https://api.together.xyz/v1
|
||||
api_key: ${env.TOGETHER_API_KEY:=}
|
||||
- provider_id: bedrock
|
||||
provider_type: remote::bedrock
|
||||
|
|
@ -52,9 +52,8 @@ providers:
|
|||
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
|
||||
provider_type: remote::nvidia
|
||||
config:
|
||||
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
|
||||
base_url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}
|
||||
api_key: ${env.NVIDIA_API_KEY:=}
|
||||
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
|
||||
- provider_id: openai
|
||||
provider_type: remote::openai
|
||||
config:
|
||||
|
|
@ -76,18 +75,18 @@ providers:
|
|||
- provider_id: groq
|
||||
provider_type: remote::groq
|
||||
config:
|
||||
url: https://api.groq.com
|
||||
base_url: https://api.groq.com/openai/v1
|
||||
api_key: ${env.GROQ_API_KEY:=}
|
||||
- provider_id: sambanova
|
||||
provider_type: remote::sambanova
|
||||
config:
|
||||
url: https://api.sambanova.ai/v1
|
||||
base_url: https://api.sambanova.ai/v1
|
||||
api_key: ${env.SAMBANOVA_API_KEY:=}
|
||||
- provider_id: ${env.AZURE_API_KEY:+azure}
|
||||
provider_type: remote::azure
|
||||
config:
|
||||
api_key: ${env.AZURE_API_KEY:=}
|
||||
api_base: ${env.AZURE_API_BASE:=}
|
||||
base_url: ${env.AZURE_API_BASE:=}
|
||||
api_version: ${env.AZURE_API_VERSION:=}
|
||||
api_type: ${env.AZURE_API_TYPE:=}
|
||||
- provider_id: sentence-transformers
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ providers:
|
|||
- provider_id: watsonx
|
||||
provider_type: remote::watsonx
|
||||
config:
|
||||
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
base_url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
|
||||
api_key: ${env.WATSONX_API_KEY:=}
|
||||
project_id: ${env.WATSONX_PROJECT_ID:=}
|
||||
vector_io:
|
||||
|
|
|
|||
|
|
@ -23,12 +23,14 @@ async def get_provider_impl(
|
|||
config,
|
||||
deps[Api.inference],
|
||||
deps[Api.vector_io],
|
||||
deps[Api.safety],
|
||||
deps.get(Api.safety),
|
||||
deps[Api.tool_runtime],
|
||||
deps[Api.tool_groups],
|
||||
deps[Api.conversations],
|
||||
policy,
|
||||
deps[Api.prompts],
|
||||
deps[Api.files],
|
||||
telemetry_enabled,
|
||||
policy,
|
||||
)
|
||||
await impl.initialize()
|
||||
return impl
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ from llama_stack.providers.utils.responses.responses_store import ResponsesStore
|
|||
from llama_stack_api import (
|
||||
Agents,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
|
|
@ -22,6 +23,7 @@ from llama_stack_api import (
|
|||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrail,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -41,10 +43,12 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
config: MetaReferenceAgentsImplConfig,
|
||||
inference_api: Inference,
|
||||
vector_io_api: VectorIO,
|
||||
safety_api: Safety,
|
||||
safety_api: Safety | None,
|
||||
tool_runtime_api: ToolRuntime,
|
||||
tool_groups_api: ToolGroups,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
policy: list[AccessRule],
|
||||
telemetry_enabled: bool = False,
|
||||
):
|
||||
|
|
@ -56,7 +60,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
self.tool_groups_api = tool_groups_api
|
||||
self.conversations_api = conversations_api
|
||||
self.telemetry_enabled = telemetry_enabled
|
||||
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
self.in_memory_store = InmemoryKVStoreImpl()
|
||||
self.openai_responses_impl: OpenAIResponsesImpl | None = None
|
||||
self.policy = policy
|
||||
|
|
@ -73,6 +78,8 @@ class MetaReferenceAgentsImpl(Agents):
|
|||
vector_io_api=self.vector_io_api,
|
||||
safety_api=self.safety_api,
|
||||
conversations_api=self.conversations_api,
|
||||
prompts_api=self.prompts_api,
|
||||
files_api=self.files_api,
|
||||
)
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import re
|
||||
import time
|
||||
import uuid
|
||||
from collections.abc import AsyncIterator
|
||||
|
|
@ -18,13 +19,17 @@ from llama_stack.providers.utils.responses.responses_store import (
|
|||
from llama_stack_api import (
|
||||
ConversationItem,
|
||||
Conversations,
|
||||
Files,
|
||||
Inference,
|
||||
InvalidConversationIdError,
|
||||
ListOpenAIResponseInputItem,
|
||||
ListOpenAIResponseObject,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
OpenAIDeleteResponseObject,
|
||||
OpenAIMessageParam,
|
||||
OpenAIResponseInput,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
OpenAIResponseMessage,
|
||||
|
|
@ -34,7 +39,9 @@ from llama_stack_api import (
|
|||
OpenAIResponseText,
|
||||
OpenAIResponseTextFormat,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIUserMessageParam,
|
||||
Order,
|
||||
Prompts,
|
||||
ResponseGuardrailSpec,
|
||||
Safety,
|
||||
ToolGroups,
|
||||
|
|
@ -46,6 +53,7 @@ from .streaming import StreamingResponseOrchestrator
|
|||
from .tool_executor import ToolExecutor
|
||||
from .types import ChatCompletionContext, ToolContext
|
||||
from .utils import (
|
||||
convert_response_content_to_chat_content,
|
||||
convert_response_input_to_chat_messages,
|
||||
convert_response_text_to_chat_response_format,
|
||||
extract_guardrail_ids,
|
||||
|
|
@ -67,8 +75,10 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api: ToolRuntime,
|
||||
responses_store: ResponsesStore,
|
||||
vector_io_api: VectorIO, # VectorIO
|
||||
safety_api: Safety,
|
||||
safety_api: Safety | None,
|
||||
conversations_api: Conversations,
|
||||
prompts_api: Prompts,
|
||||
files_api: Files,
|
||||
):
|
||||
self.inference_api = inference_api
|
||||
self.tool_groups_api = tool_groups_api
|
||||
|
|
@ -82,6 +92,8 @@ class OpenAIResponsesImpl:
|
|||
tool_runtime_api=tool_runtime_api,
|
||||
vector_io_api=vector_io_api,
|
||||
)
|
||||
self.prompts_api = prompts_api
|
||||
self.files_api = files_api
|
||||
|
||||
async def _prepend_previous_response(
|
||||
self,
|
||||
|
|
@ -122,11 +134,13 @@ class OpenAIResponsesImpl:
|
|||
# Use stored messages directly and convert only new input
|
||||
message_adapter = TypeAdapter(list[OpenAIMessageParam])
|
||||
messages = message_adapter.validate_python(previous_response.messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
# Backward compatibility: reconstruct from inputs
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
tool_context.recover_tools_from_previous_response(previous_response)
|
||||
elif conversation is not None:
|
||||
|
|
@ -138,7 +152,7 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
if not conversation_items.data:
|
||||
# First turn - just convert the new input
|
||||
messages = await convert_response_input_to_chat_messages(input)
|
||||
messages = await convert_response_input_to_chat_messages(input, files_api=self.files_api)
|
||||
else:
|
||||
if not stored_messages:
|
||||
all_input = conversation_items.data
|
||||
|
|
@ -154,14 +168,82 @@ class OpenAIResponsesImpl:
|
|||
all_input = input
|
||||
|
||||
messages = stored_messages or []
|
||||
new_messages = await convert_response_input_to_chat_messages(all_input, previous_messages=messages)
|
||||
new_messages = await convert_response_input_to_chat_messages(
|
||||
all_input, previous_messages=messages, files_api=self.files_api
|
||||
)
|
||||
messages.extend(new_messages)
|
||||
else:
|
||||
all_input = input
|
||||
messages = await convert_response_input_to_chat_messages(all_input)
|
||||
messages = await convert_response_input_to_chat_messages(all_input, files_api=self.files_api)
|
||||
|
||||
return all_input, messages, tool_context
|
||||
|
||||
async def _prepend_prompt(
|
||||
self,
|
||||
messages: list[OpenAIMessageParam],
|
||||
openai_response_prompt: OpenAIResponsePrompt | None,
|
||||
) -> None:
|
||||
"""Prepend prompt template to messages, resolving text/image/file variables.
|
||||
|
||||
:param messages: List of OpenAIMessageParam objects
|
||||
:param openai_response_prompt: (Optional) OpenAIResponsePrompt object with variables
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
if not openai_response_prompt or not openai_response_prompt.id:
|
||||
return
|
||||
|
||||
prompt_version = int(openai_response_prompt.version) if openai_response_prompt.version else None
|
||||
cur_prompt = await self.prompts_api.get_prompt(openai_response_prompt.id, prompt_version)
|
||||
|
||||
if not cur_prompt or not cur_prompt.prompt:
|
||||
return
|
||||
|
||||
cur_prompt_text = cur_prompt.prompt
|
||||
cur_prompt_variables = cur_prompt.variables
|
||||
|
||||
if not openai_response_prompt.variables:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=cur_prompt_text))
|
||||
return
|
||||
|
||||
# Validate that all provided variables exist in the prompt
|
||||
for name in openai_response_prompt.variables.keys():
|
||||
if name not in cur_prompt_variables:
|
||||
raise ValueError(f"Variable {name} not found in prompt {openai_response_prompt.id}")
|
||||
|
||||
# Separate text and media variables
|
||||
text_substitutions = {}
|
||||
media_content_parts: list[OpenAIChatCompletionContentPartParam] = []
|
||||
|
||||
for name, value in openai_response_prompt.variables.items():
|
||||
# Text variable found
|
||||
if isinstance(value, OpenAIResponseInputMessageContentText):
|
||||
text_substitutions[name] = value.text
|
||||
|
||||
# Media variable found
|
||||
elif isinstance(value, OpenAIResponseInputMessageContentImage | OpenAIResponseInputMessageContentFile):
|
||||
converted_parts = await convert_response_content_to_chat_content([value], files_api=self.files_api)
|
||||
if isinstance(converted_parts, list):
|
||||
media_content_parts.extend(converted_parts)
|
||||
|
||||
# Eg: {{product_photo}} becomes "[Image: product_photo]"
|
||||
# This gives the model textual context about what media exists in the prompt
|
||||
var_type = value.type.replace("input_", "").replace("_", " ").title()
|
||||
text_substitutions[name] = f"[{var_type}: {name}]"
|
||||
|
||||
def replace_variable(match: re.Match[str]) -> str:
|
||||
var_name = match.group(1).strip()
|
||||
return str(text_substitutions.get(var_name, match.group(0)))
|
||||
|
||||
pattern = r"\{\{\s*(\w+)\s*\}\}"
|
||||
processed_prompt_text = re.sub(pattern, replace_variable, cur_prompt_text)
|
||||
|
||||
# Insert system message with resolved text
|
||||
messages.insert(0, OpenAISystemMessageParam(content=processed_prompt_text))
|
||||
|
||||
# If we have media, create a new user message because allows to ingest images and files
|
||||
if media_content_parts:
|
||||
messages.append(OpenAIUserMessageParam(content=media_content_parts))
|
||||
|
||||
async def get_openai_response(
|
||||
self,
|
||||
response_id: str,
|
||||
|
|
@ -273,6 +355,14 @@ class OpenAIResponsesImpl:
|
|||
|
||||
guardrail_ids = extract_guardrail_ids(guardrails) if guardrails else []
|
||||
|
||||
# Validate that Safety API is available if guardrails are requested
|
||||
if guardrail_ids and self.safety_api is None:
|
||||
raise ValueError(
|
||||
"Cannot process guardrails: Safety API is not configured.\n\n"
|
||||
"To use guardrails, ensure the Safety API is configured in your stack, or remove "
|
||||
"the 'guardrails' parameter from your request."
|
||||
)
|
||||
|
||||
if conversation is not None:
|
||||
if previous_response_id is not None:
|
||||
raise ValueError(
|
||||
|
|
@ -289,6 +379,7 @@ class OpenAIResponsesImpl:
|
|||
input=input,
|
||||
conversation=conversation,
|
||||
model=model,
|
||||
prompt=prompt,
|
||||
instructions=instructions,
|
||||
previous_response_id=previous_response_id,
|
||||
store=store,
|
||||
|
|
@ -342,6 +433,7 @@ class OpenAIResponsesImpl:
|
|||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
store: bool | None = True,
|
||||
temperature: float | None = None,
|
||||
text: OpenAIResponseText | None = None,
|
||||
|
|
@ -364,6 +456,9 @@ class OpenAIResponsesImpl:
|
|||
if instructions:
|
||||
messages.insert(0, OpenAISystemMessageParam(content=instructions))
|
||||
|
||||
# Prepend reusable prompt (if provided)
|
||||
await self._prepend_prompt(messages, prompt)
|
||||
|
||||
# Structured outputs
|
||||
response_format = await convert_response_text_to_chat_response_format(text)
|
||||
|
||||
|
|
@ -386,6 +481,7 @@ class OpenAIResponsesImpl:
|
|||
ctx=ctx,
|
||||
response_id=response_id,
|
||||
created_at=created_at,
|
||||
prompt=prompt,
|
||||
text=text,
|
||||
max_infer_iters=max_infer_iters,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
|
|
|
|||
|
|
@ -66,6 +66,8 @@ from llama_stack_api import (
|
|||
OpenAIResponseUsage,
|
||||
OpenAIResponseUsageInputTokensDetails,
|
||||
OpenAIResponseUsageOutputTokensDetails,
|
||||
OpenAIToolMessageParam,
|
||||
Safety,
|
||||
WebSearchToolTypes,
|
||||
)
|
||||
|
||||
|
|
@ -111,7 +113,7 @@ class StreamingResponseOrchestrator:
|
|||
max_infer_iters: int,
|
||||
tool_executor, # Will be the tool execution logic from the main class
|
||||
instructions: str | None,
|
||||
safety_api,
|
||||
safety_api: Safety | None,
|
||||
guardrail_ids: list[str] | None = None,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
parallel_tool_calls: bool | None = None,
|
||||
|
|
@ -905,10 +907,16 @@ class StreamingResponseOrchestrator:
|
|||
"""Coordinate execution of both function and non-function tool calls."""
|
||||
# Execute non-function tool calls
|
||||
for tool_call in non_function_tool_calls:
|
||||
# Check if total calls made to built-in and mcp tools exceed max_tool_calls
|
||||
# if total calls made to built-in and mcp tools exceed max_tool_calls
|
||||
# then create a tool response message indicating the call was skipped
|
||||
if self.max_tool_calls is not None and self.accumulated_builtin_tool_calls >= self.max_tool_calls:
|
||||
logger.info(f"Ignoring built-in and mcp tool call since reached the limit of {self.max_tool_calls=}.")
|
||||
break
|
||||
skipped_call_message = OpenAIToolMessageParam(
|
||||
content=f"Tool call skipped: maximum tool calls limit ({self.max_tool_calls}) reached.",
|
||||
tool_call_id=tool_call.id,
|
||||
)
|
||||
next_turn_messages.append(skipped_call_message)
|
||||
continue
|
||||
|
||||
# Find the item_id for this tool call
|
||||
matching_item_id = None
|
||||
|
|
|
|||
|
|
@ -5,11 +5,14 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import mimetypes
|
||||
import re
|
||||
import uuid
|
||||
from collections.abc import Sequence
|
||||
|
||||
from llama_stack_api import (
|
||||
Files,
|
||||
OpenAIAssistantMessageParam,
|
||||
OpenAIChatCompletionContentPartImageParam,
|
||||
OpenAIChatCompletionContentPartParam,
|
||||
|
|
@ -18,6 +21,8 @@ from llama_stack_api import (
|
|||
OpenAIChatCompletionToolCallFunction,
|
||||
OpenAIChoice,
|
||||
OpenAIDeveloperMessageParam,
|
||||
OpenAIFile,
|
||||
OpenAIFileFile,
|
||||
OpenAIImageURL,
|
||||
OpenAIJSONSchema,
|
||||
OpenAIMessageParam,
|
||||
|
|
@ -29,6 +34,7 @@ from llama_stack_api import (
|
|||
OpenAIResponseInput,
|
||||
OpenAIResponseInputFunctionToolCallOutput,
|
||||
OpenAIResponseInputMessageContent,
|
||||
OpenAIResponseInputMessageContentFile,
|
||||
OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText,
|
||||
OpenAIResponseInputTool,
|
||||
|
|
@ -37,9 +43,11 @@ from llama_stack_api import (
|
|||
OpenAIResponseMessage,
|
||||
OpenAIResponseOutputMessageContent,
|
||||
OpenAIResponseOutputMessageContentOutputText,
|
||||
OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
OpenAIResponseOutputMessageFunctionToolCall,
|
||||
OpenAIResponseOutputMessageMCPCall,
|
||||
OpenAIResponseOutputMessageMCPListTools,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall,
|
||||
OpenAIResponseText,
|
||||
OpenAISystemMessageParam,
|
||||
OpenAIToolMessageParam,
|
||||
|
|
@ -49,6 +57,46 @@ from llama_stack_api import (
|
|||
)
|
||||
|
||||
|
||||
async def extract_bytes_from_file(file_id: str, files_api: Files) -> bytes:
|
||||
"""
|
||||
Extract raw bytes from file using the Files API.
|
||||
|
||||
:param file_id: The file identifier (e.g., "file-abc123")
|
||||
:param files_api: Files API instance
|
||||
:returns: Raw file content as bytes
|
||||
:raises: ValueError if file cannot be retrieved
|
||||
"""
|
||||
try:
|
||||
response = await files_api.openai_retrieve_file_content(file_id)
|
||||
return bytes(response.body)
|
||||
except Exception as e:
|
||||
raise ValueError(f"Failed to retrieve file content for file_id '{file_id}': {str(e)}") from e
|
||||
|
||||
|
||||
def generate_base64_ascii_text_from_bytes(raw_bytes: bytes) -> str:
|
||||
"""
|
||||
Converts raw binary bytes into a safe ASCII text representation for URLs
|
||||
|
||||
:param raw_bytes: the actual bytes that represents file content
|
||||
:returns: string of utf-8 characters
|
||||
"""
|
||||
return base64.b64encode(raw_bytes).decode("utf-8")
|
||||
|
||||
|
||||
def construct_data_url(ascii_text: str, mime_type: str | None) -> str:
|
||||
"""
|
||||
Construct data url with decoded data inside
|
||||
|
||||
:param ascii_text: ASCII content
|
||||
:param mime_type: MIME type of file
|
||||
:returns: data url string (eg. data:image/png,base64,%3Ch1%3EHello%2C%20World%21%3C%2Fh1%3E)
|
||||
"""
|
||||
if not mime_type:
|
||||
mime_type = "application/octet-stream"
|
||||
|
||||
return f"data:{mime_type};base64,{ascii_text}"
|
||||
|
||||
|
||||
async def convert_chat_choice_to_response_message(
|
||||
choice: OpenAIChoice,
|
||||
citation_files: dict[str, str] | None = None,
|
||||
|
|
@ -78,11 +126,15 @@ async def convert_chat_choice_to_response_message(
|
|||
|
||||
async def convert_response_content_to_chat_content(
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent | OpenAIResponseOutputMessageContent],
|
||||
files_api: Files | None,
|
||||
) -> str | list[OpenAIChatCompletionContentPartParam]:
|
||||
"""
|
||||
Convert the content parts from an OpenAI Response API request into OpenAI Chat Completion content parts.
|
||||
|
||||
The content schemas of each API look similar, but are not exactly the same.
|
||||
|
||||
:param content: The content to convert
|
||||
:param files_api: Files API for resolving file_id to raw file content (required if content contains files/images)
|
||||
"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
|
@ -95,9 +147,68 @@ async def convert_response_content_to_chat_content(
|
|||
elif isinstance(content_part, OpenAIResponseOutputMessageContentOutputText):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part.text))
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentImage):
|
||||
detail = content_part.detail
|
||||
image_mime_type = None
|
||||
if content_part.image_url:
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=content_part.detail)
|
||||
image_url = OpenAIImageURL(url=content_part.image_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
elif content_part.file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
image_file_response = await files_api.openai_retrieve_file(content_part.file_id)
|
||||
if image_file_response.filename:
|
||||
image_mime_type, _ = mimetypes.guess_type(image_file_response.filename)
|
||||
raw_image_bytes = await extract_bytes_from_file(content_part.file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_image_bytes)
|
||||
image_data_url = construct_data_url(ascii_text, image_mime_type)
|
||||
image_url = OpenAIImageURL(url=image_data_url, detail=detail)
|
||||
converted_parts.append(OpenAIChatCompletionContentPartImageParam(image_url=image_url))
|
||||
else:
|
||||
raise ValueError(
|
||||
f"Image content must have either 'image_url' or 'file_id'. "
|
||||
f"Got image_url={content_part.image_url}, file_id={content_part.file_id}"
|
||||
)
|
||||
elif isinstance(content_part, OpenAIResponseInputMessageContentFile):
|
||||
resolved_file_data = None
|
||||
file_data = content_part.file_data
|
||||
file_id = content_part.file_id
|
||||
file_url = content_part.file_url
|
||||
filename = content_part.filename
|
||||
file_mime_type = None
|
||||
if not any([file_data, file_id, file_url]):
|
||||
raise ValueError(
|
||||
f"File content must have at least one of 'file_data', 'file_id', or 'file_url'. "
|
||||
f"Got file_data={file_data}, file_id={file_id}, file_url={file_url}"
|
||||
)
|
||||
if file_id:
|
||||
if files_api is None:
|
||||
raise ValueError("file_ids are not supported by this implementation of the Stack")
|
||||
|
||||
file_response = await files_api.openai_retrieve_file(file_id)
|
||||
if not filename:
|
||||
filename = file_response.filename
|
||||
file_mime_type, _ = mimetypes.guess_type(file_response.filename)
|
||||
raw_file_bytes = await extract_bytes_from_file(file_id, files_api)
|
||||
ascii_text = generate_base64_ascii_text_from_bytes(raw_file_bytes)
|
||||
resolved_file_data = construct_data_url(ascii_text, file_mime_type)
|
||||
elif file_data:
|
||||
if file_data.startswith("data:"):
|
||||
resolved_file_data = file_data
|
||||
else:
|
||||
# Raw base64 data, wrap in data URL format
|
||||
if filename:
|
||||
file_mime_type, _ = mimetypes.guess_type(filename)
|
||||
resolved_file_data = construct_data_url(file_data, file_mime_type)
|
||||
elif file_url:
|
||||
resolved_file_data = file_url
|
||||
converted_parts.append(
|
||||
OpenAIFile(
|
||||
file=OpenAIFileFile(
|
||||
file_data=resolved_file_data,
|
||||
filename=filename,
|
||||
)
|
||||
)
|
||||
)
|
||||
elif isinstance(content_part, str):
|
||||
converted_parts.append(OpenAIChatCompletionContentPartTextParam(text=content_part))
|
||||
else:
|
||||
|
|
@ -110,12 +221,14 @@ async def convert_response_content_to_chat_content(
|
|||
async def convert_response_input_to_chat_messages(
|
||||
input: str | list[OpenAIResponseInput],
|
||||
previous_messages: list[OpenAIMessageParam] | None = None,
|
||||
files_api: Files | None = None,
|
||||
) -> list[OpenAIMessageParam]:
|
||||
"""
|
||||
Convert the input from an OpenAI Response API request into OpenAI Chat Completion messages.
|
||||
|
||||
:param input: The input to convert
|
||||
:param previous_messages: Optional previous messages to check for function_call references
|
||||
:param files_api: Files API for resolving file_id to raw file content (optional, required for file/image content)
|
||||
"""
|
||||
messages: list[OpenAIMessageParam] = []
|
||||
if isinstance(input, list):
|
||||
|
|
@ -169,6 +282,12 @@ async def convert_response_input_to_chat_messages(
|
|||
elif isinstance(input_item, OpenAIResponseOutputMessageMCPListTools):
|
||||
# the tool list will be handled separately
|
||||
pass
|
||||
elif isinstance(
|
||||
input_item,
|
||||
OpenAIResponseOutputMessageWebSearchToolCall | OpenAIResponseOutputMessageFileSearchToolCall,
|
||||
):
|
||||
# these tool calls are tracked internally but not converted to chat messages
|
||||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMCPApprovalRequest) or isinstance(
|
||||
input_item, OpenAIResponseMCPApprovalResponse
|
||||
):
|
||||
|
|
@ -176,7 +295,7 @@ async def convert_response_input_to_chat_messages(
|
|||
pass
|
||||
elif isinstance(input_item, OpenAIResponseMessage):
|
||||
# Narrow type to OpenAIResponseMessage which has content and role attributes
|
||||
content = await convert_response_content_to_chat_content(input_item.content)
|
||||
content = await convert_response_content_to_chat_content(input_item.content, files_api)
|
||||
message_type = await get_message_type_by_role(input_item.role)
|
||||
if message_type is None:
|
||||
raise ValueError(
|
||||
|
|
@ -320,11 +439,15 @@ def is_function_tool_call(
|
|||
return False
|
||||
|
||||
|
||||
async def run_guardrails(safety_api: Safety, messages: str, guardrail_ids: list[str]) -> str | None:
|
||||
async def run_guardrails(safety_api: Safety | None, messages: str, guardrail_ids: list[str]) -> str | None:
|
||||
"""Run guardrails against messages and return violation message if blocked."""
|
||||
if not messages:
|
||||
return None
|
||||
|
||||
# If safety API is not available, skip guardrails
|
||||
if safety_api is None:
|
||||
return None
|
||||
|
||||
# Look up shields to get their provider_resource_id (actual model ID)
|
||||
model_ids = []
|
||||
# TODO: list_shields not in Safety interface but available at runtime via API routing
|
||||
|
|
|
|||
|
|
@ -30,11 +30,15 @@ def available_providers() -> list[ProviderSpec]:
|
|||
config_class="llama_stack.providers.inline.agents.meta_reference.MetaReferenceAgentsImplConfig",
|
||||
api_dependencies=[
|
||||
Api.inference,
|
||||
Api.safety,
|
||||
Api.vector_io,
|
||||
Api.tool_runtime,
|
||||
Api.tool_groups,
|
||||
Api.conversations,
|
||||
Api.prompts,
|
||||
Api.files,
|
||||
],
|
||||
optional_api_dependencies=[
|
||||
Api.safety,
|
||||
],
|
||||
description="Meta's reference implementation of an agent system that can use tools, access vector databases, and perform complex reasoning tasks.",
|
||||
),
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from .config import AzureConfig
|
||||
|
|
@ -22,4 +20,4 @@ class AzureInferenceAdapter(OpenAIMixin):
|
|||
|
||||
Returns the Azure API base URL from the configuration.
|
||||
"""
|
||||
return urljoin(str(self.config.api_base), "/openai/v1")
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -32,8 +32,9 @@ class AzureProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class AzureConfig(RemoteInferenceProviderConfig):
|
||||
api_base: HttpUrl = Field(
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com)",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="Azure API base for Azure (e.g., https://your-resource-name.openai.azure.com/openai/v1)",
|
||||
)
|
||||
api_version: str | None = Field(
|
||||
default_factory=lambda: os.getenv("AZURE_API_VERSION"),
|
||||
|
|
@ -48,14 +49,14 @@ class AzureConfig(RemoteInferenceProviderConfig):
|
|||
def sample_run_config(
|
||||
cls,
|
||||
api_key: str = "${env.AZURE_API_KEY:=}",
|
||||
api_base: str = "${env.AZURE_API_BASE:=}",
|
||||
base_url: str = "${env.AZURE_API_BASE:=}",
|
||||
api_version: str = "${env.AZURE_API_VERSION:=}",
|
||||
api_type: str = "${env.AZURE_API_TYPE:=}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"api_key": api_key,
|
||||
"api_base": api_base,
|
||||
"base_url": base_url,
|
||||
"api_version": api_version,
|
||||
"api_type": api_type,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,8 +4,6 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from urllib.parse import urljoin
|
||||
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
from llama_stack_api import (
|
||||
OpenAIEmbeddingsRequestWithExtraBody,
|
||||
|
|
@ -21,7 +19,7 @@ class CerebrasInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "cerebras_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return urljoin(self.config.base_url, "v1")
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_embeddings(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -7,12 +7,12 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
||||
DEFAULT_BASE_URL = "https://api.cerebras.ai"
|
||||
DEFAULT_BASE_URL = "https://api.cerebras.ai/v1"
|
||||
|
||||
|
||||
class CerebrasProviderDataValidator(BaseModel):
|
||||
|
|
@ -24,8 +24,8 @@ class CerebrasProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class CerebrasImplConfig(RemoteInferenceProviderConfig):
|
||||
base_url: str = Field(
|
||||
default=os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL),
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl(os.environ.get("CEREBRAS_BASE_URL", DEFAULT_BASE_URL)),
|
||||
description="Base URL for the Cerebras API",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,9 +21,9 @@ class DatabricksProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the Databricks model serving endpoint",
|
||||
description="The URL for the Databricks model serving endpoint (should include /serving-endpoints path)",
|
||||
)
|
||||
auth_credential: SecretStr | None = Field(
|
||||
default=None,
|
||||
|
|
@ -34,11 +34,11 @@ class DatabricksImplConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.DATABRICKS_HOST:=}",
|
||||
base_url: str = "${env.DATABRICKS_HOST:=}",
|
||||
api_token: str = "${env.DATABRICKS_TOKEN:=}",
|
||||
**kwargs: Any,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_token": api_token,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -29,15 +29,21 @@ class DatabricksInferenceAdapter(OpenAIMixin):
|
|||
}
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/serving-endpoints"
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
# Filter out None values from endpoint names
|
||||
api_token = self._get_api_key_from_config_or_provider_data()
|
||||
# WorkspaceClient expects base host without /serving-endpoints suffix
|
||||
base_url_str = str(self.config.base_url)
|
||||
if base_url_str.endswith("/serving-endpoints"):
|
||||
host = base_url_str[:-18] # Remove '/serving-endpoints'
|
||||
else:
|
||||
host = base_url_str
|
||||
return [
|
||||
endpoint.name # type: ignore[misc]
|
||||
for endpoint in WorkspaceClient(
|
||||
host=self.config.url, token=api_token
|
||||
host=host, token=api_token
|
||||
).serving_endpoints.list() # TODO: this is not async
|
||||
]
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class FireworksImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.fireworks.ai/inference/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.fireworks.ai/inference/v1"),
|
||||
description="The URL for the Fireworks server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.FIREWORKS_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.fireworks.ai/inference/v1",
|
||||
"base_url": "https://api.fireworks.ai/inference/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,4 +24,4 @@ class FireworksInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "fireworks_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return "https://api.fireworks.ai/inference/v1"
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class GroqProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class GroqConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.groq.com",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.groq.com/openai/v1"),
|
||||
description="The URL for the Groq AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.GROQ_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.groq.com",
|
||||
"base_url": "https://api.groq.com/openai/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,4 +15,4 @@ class GroqInferenceAdapter(OpenAIMixin):
|
|||
provider_data_api_key_field: str = "groq_api_key"
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return f"{self.config.url}/openai/v1"
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class LlamaProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class LlamaCompatConfig(RemoteInferenceProviderConfig):
|
||||
openai_compat_api_base: str = Field(
|
||||
default="https://api.llama.com/compat/v1/",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.llama.com/compat/v1/"),
|
||||
description="The URL for the Llama API server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.LLAMA_API_KEY}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"openai_compat_api_base": "https://api.llama.com/compat/v1/",
|
||||
"base_url": "https://api.llama.com/compat/v1/",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ class LlamaCompatInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The Llama API base URL
|
||||
"""
|
||||
return self.config.openai_compat_api_base
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_completion(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -44,18 +44,14 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
URL of your running NVIDIA NIM and do not need to set the api_key.
|
||||
"""
|
||||
|
||||
url: str = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com"),
|
||||
base_url: HttpUrl | None = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_BASE_URL", "https://integrate.api.nvidia.com/v1"),
|
||||
description="A base url for accessing the NVIDIA NIM",
|
||||
)
|
||||
timeout: int = Field(
|
||||
default=60,
|
||||
description="Timeout for the HTTP requests",
|
||||
)
|
||||
append_api_version: bool = Field(
|
||||
default_factory=lambda: os.getenv("NVIDIA_APPEND_API_VERSION", "True").lower() != "false",
|
||||
description="When set to false, the API version will not be appended to the base_url. By default, it is true.",
|
||||
)
|
||||
rerank_model_to_url: dict[str, str] = Field(
|
||||
default_factory=lambda: {
|
||||
"nv-rerank-qa-mistral-4b:1": "https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking",
|
||||
|
|
@ -68,13 +64,11 @@ class NVIDIAConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}",
|
||||
base_url: HttpUrl | None = "${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com/v1}",
|
||||
api_key: str = "${env.NVIDIA_API_KEY:=}",
|
||||
append_api_version: bool = "${env.NVIDIA_APPEND_API_VERSION:=True}",
|
||||
**kwargs,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
"append_api_version": append_api_version,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.base_url})...")
|
||||
|
||||
if _is_nvidia_hosted(self.config):
|
||||
if not self.config.auth_credential:
|
||||
|
|
@ -72,7 +72,7 @@ class NVIDIAInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The NVIDIA API base URL
|
||||
"""
|
||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -8,4 +8,4 @@ from . import NVIDIAConfig
|
|||
|
||||
|
||||
def _is_nvidia_hosted(config: NVIDIAConfig) -> bool:
|
||||
return "integrate.api.nvidia.com" in config.url
|
||||
return "integrate.api.nvidia.com" in str(config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,20 +6,22 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field, SecretStr
|
||||
from pydantic import Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
|
||||
DEFAULT_OLLAMA_URL = "http://localhost:11434"
|
||||
DEFAULT_OLLAMA_URL = "http://localhost:11434/v1"
|
||||
|
||||
|
||||
class OllamaImplConfig(RemoteInferenceProviderConfig):
|
||||
auth_credential: SecretStr | None = Field(default=None, exclude=True)
|
||||
|
||||
url: str = DEFAULT_OLLAMA_URL
|
||||
base_url: HttpUrl | None = Field(default=HttpUrl(DEFAULT_OLLAMA_URL))
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, url: str = "${env.OLLAMA_URL:=http://localhost:11434}", **kwargs) -> dict[str, Any]:
|
||||
def sample_run_config(
|
||||
cls, base_url: str = "${env.OLLAMA_URL:=http://localhost:11434/v1}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -55,17 +55,23 @@ class OllamaInferenceAdapter(OpenAIMixin):
|
|||
# ollama client attaches itself to the current event loop (sadly?)
|
||||
loop = asyncio.get_running_loop()
|
||||
if loop not in self._clients:
|
||||
self._clients[loop] = AsyncOllamaClient(host=self.config.url)
|
||||
# Ollama client expects base URL without /v1 suffix
|
||||
base_url_str = str(self.config.base_url)
|
||||
if base_url_str.endswith("/v1"):
|
||||
host = base_url_str[:-3]
|
||||
else:
|
||||
host = base_url_str
|
||||
self._clients[loop] = AsyncOllamaClient(host=host)
|
||||
return self._clients[loop]
|
||||
|
||||
def get_api_key(self):
|
||||
return "NO KEY REQUIRED"
|
||||
|
||||
def get_base_url(self):
|
||||
return self.config.url.rstrip("/") + "/v1"
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"checking connectivity to Ollama at `{self.config.url}`...")
|
||||
logger.info(f"checking connectivity to Ollama at `{self.config.base_url}`...")
|
||||
r = await self.health()
|
||||
if r["status"] == HealthStatus.ERROR:
|
||||
logger.warning(
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,8 +21,8 @@ class OpenAIProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class OpenAIConfig(RemoteInferenceProviderConfig):
|
||||
base_url: str = Field(
|
||||
default="https://api.openai.com/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.openai.com/v1"),
|
||||
description="Base URL for OpenAI API",
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,4 +35,4 @@ class OpenAIInferenceAdapter(OpenAIMixin):
|
|||
|
||||
Returns the OpenAI API base URL from the configuration.
|
||||
"""
|
||||
return self.config.base_url
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,16 +14,16 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class PassthroughImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the passthrough endpoint",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls, url: str = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
cls, base_url: HttpUrl | None = "${env.PASSTHROUGH_URL}", api_key: str = "${env.PASSTHROUGH_API_KEY}", **kwargs
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -82,8 +82,8 @@ class PassthroughInferenceAdapter(NeedsRequestProviderData, Inference):
|
|||
|
||||
def _get_passthrough_url(self) -> str:
|
||||
"""Get the passthrough URL from config or provider data."""
|
||||
if self.config.url is not None:
|
||||
return self.config.url
|
||||
if self.config.base_url is not None:
|
||||
return str(self.config.base_url)
|
||||
|
||||
provider_data = self.get_request_provider_data()
|
||||
if provider_data is None:
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,7 +21,7 @@ class RunpodProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class RunpodImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the Runpod model serving endpoint",
|
||||
)
|
||||
|
|
@ -34,6 +34,6 @@ class RunpodImplConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.RUNPOD_URL:=}",
|
||||
"base_url": "${env.RUNPOD_URL:=}",
|
||||
"api_token": "${env.RUNPOD_API_TOKEN}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ class RunpodInferenceAdapter(OpenAIMixin):
|
|||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get base URL for OpenAI client."""
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def openai_chat_completion(
|
||||
self,
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -21,14 +21,14 @@ class SambaNovaProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class SambaNovaImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.sambanova.ai/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.sambanova.ai/v1"),
|
||||
description="The URL for the SambaNova AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, api_key: str = "${env.SAMBANOVA_API_KEY:=}", **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.sambanova.ai/v1",
|
||||
"base_url": "https://api.sambanova.ai/v1",
|
||||
"api_key": api_key,
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,4 +25,4 @@ class SambaNovaInferenceAdapter(OpenAIMixin):
|
|||
|
||||
:return: The SambaNova base URL
|
||||
"""
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from pydantic import BaseModel, Field, SecretStr
|
||||
from pydantic import BaseModel, Field, HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -15,18 +15,19 @@ from llama_stack_api import json_schema_type
|
|||
class TGIImplConfig(RemoteInferenceProviderConfig):
|
||||
auth_credential: SecretStr | None = Field(default=None, exclude=True)
|
||||
|
||||
url: str = Field(
|
||||
description="The URL for the TGI serving endpoint",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the TGI serving endpoint (should include /v1 path)",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.TGI_URL:=}",
|
||||
base_url: str = "${env.TGI_URL:=}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@
|
|||
from collections.abc import Iterable
|
||||
|
||||
from huggingface_hub import AsyncInferenceClient, HfApi
|
||||
from pydantic import SecretStr
|
||||
from pydantic import HttpUrl, SecretStr
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
|
@ -23,7 +23,7 @@ log = get_logger(name=__name__, category="inference::tgi")
|
|||
|
||||
|
||||
class _HfAdapter(OpenAIMixin):
|
||||
url: str
|
||||
base_url: HttpUrl
|
||||
api_key: SecretStr
|
||||
|
||||
hf_client: AsyncInferenceClient
|
||||
|
|
@ -36,7 +36,7 @@ class _HfAdapter(OpenAIMixin):
|
|||
return "NO KEY REQUIRED"
|
||||
|
||||
def get_base_url(self):
|
||||
return self.url
|
||||
return self.base_url
|
||||
|
||||
async def list_provider_model_ids(self) -> Iterable[str]:
|
||||
return [self.model_id]
|
||||
|
|
@ -50,14 +50,20 @@ class _HfAdapter(OpenAIMixin):
|
|||
|
||||
class TGIAdapter(_HfAdapter):
|
||||
async def initialize(self, config: TGIImplConfig) -> None:
|
||||
if not config.url:
|
||||
if not config.base_url:
|
||||
raise ValueError("You must provide a URL in run.yaml (or via the TGI_URL environment variable) to use TGI.")
|
||||
log.info(f"Initializing TGI client with url={config.url}")
|
||||
self.hf_client = AsyncInferenceClient(model=config.url, provider="hf-inference")
|
||||
log.info(f"Initializing TGI client with url={config.base_url}")
|
||||
# Extract base URL without /v1 for HF client initialization
|
||||
base_url_str = str(config.base_url).rstrip("/")
|
||||
if base_url_str.endswith("/v1"):
|
||||
base_url_for_client = base_url_str[:-3]
|
||||
else:
|
||||
base_url_for_client = base_url_str
|
||||
self.hf_client = AsyncInferenceClient(model=base_url_for_client, provider="hf-inference")
|
||||
endpoint_info = await self.hf_client.get_endpoint_info()
|
||||
self.max_tokens = endpoint_info["max_total_tokens"]
|
||||
self.model_id = endpoint_info["model_id"]
|
||||
self.url = f"{config.url.rstrip('/')}/v1"
|
||||
self.base_url = config.base_url
|
||||
self.api_key = SecretStr("NO_KEY")
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from typing import Any
|
||||
|
||||
from pydantic import Field
|
||||
from pydantic import Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,14 +14,14 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class TogetherImplConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
default="https://api.together.xyz/v1",
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=HttpUrl("https://api.together.xyz/v1"),
|
||||
description="The URL for the Together AI server",
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "https://api.together.xyz/v1",
|
||||
"base_url": "https://api.together.xyz/v1",
|
||||
"api_key": "${env.TOGETHER_API_KEY:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,7 +9,6 @@ from collections.abc import Iterable
|
|||
from typing import Any, cast
|
||||
|
||||
from together import AsyncTogether # type: ignore[import-untyped]
|
||||
from together.constants import BASE_URL # type: ignore[import-untyped]
|
||||
|
||||
from llama_stack.core.request_headers import NeedsRequestProviderData
|
||||
from llama_stack.log import get_logger
|
||||
|
|
@ -42,7 +41,7 @@ class TogetherInferenceAdapter(OpenAIMixin, NeedsRequestProviderData):
|
|||
provider_data_api_key_field: str = "together_api_key"
|
||||
|
||||
def get_base_url(self):
|
||||
return BASE_URL
|
||||
return str(self.config.base_url)
|
||||
|
||||
def _get_client(self) -> AsyncTogether:
|
||||
together_api_key = None
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
from pathlib import Path
|
||||
|
||||
from pydantic import Field, SecretStr, field_validator
|
||||
from pydantic import Field, HttpUrl, SecretStr, field_validator
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -14,7 +14,7 @@ from llama_stack_api import json_schema_type
|
|||
|
||||
@json_schema_type
|
||||
class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
|
||||
url: str | None = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default=None,
|
||||
description="The URL for the vLLM model serving endpoint",
|
||||
)
|
||||
|
|
@ -48,11 +48,11 @@ class VLLMInferenceAdapterConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(
|
||||
cls,
|
||||
url: str = "${env.VLLM_URL:=}",
|
||||
base_url: str = "${env.VLLM_URL:=}",
|
||||
**kwargs,
|
||||
):
|
||||
return {
|
||||
"url": url,
|
||||
"base_url": base_url,
|
||||
"max_tokens": "${env.VLLM_MAX_TOKENS:=4096}",
|
||||
"api_token": "${env.VLLM_API_TOKEN:=fake}",
|
||||
"tls_verify": "${env.VLLM_TLS_VERIFY:=true}",
|
||||
|
|
|
|||
|
|
@ -39,12 +39,12 @@ class VLLMInferenceAdapter(OpenAIMixin):
|
|||
|
||||
def get_base_url(self) -> str:
|
||||
"""Get the base URL from config."""
|
||||
if not self.config.url:
|
||||
if not self.config.base_url:
|
||||
raise ValueError("No base URL configured")
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
async def initialize(self) -> None:
|
||||
if not self.config.url:
|
||||
if not self.config.base_url:
|
||||
raise ValueError(
|
||||
"You must provide a URL in run.yaml (or via the VLLM_URL environment variable) to use vLLM."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
import os
|
||||
from typing import Any
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, HttpUrl
|
||||
|
||||
from llama_stack.providers.utils.inference.model_registry import RemoteInferenceProviderConfig
|
||||
from llama_stack_api import json_schema_type
|
||||
|
|
@ -23,7 +23,7 @@ class WatsonXProviderDataValidator(BaseModel):
|
|||
|
||||
@json_schema_type
|
||||
class WatsonXConfig(RemoteInferenceProviderConfig):
|
||||
url: str = Field(
|
||||
base_url: HttpUrl | None = Field(
|
||||
default_factory=lambda: os.getenv("WATSONX_BASE_URL", "https://us-south.ml.cloud.ibm.com"),
|
||||
description="A base url for accessing the watsonx.ai",
|
||||
)
|
||||
|
|
@ -39,7 +39,7 @@ class WatsonXConfig(RemoteInferenceProviderConfig):
|
|||
@classmethod
|
||||
def sample_run_config(cls, **kwargs) -> dict[str, Any]:
|
||||
return {
|
||||
"url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
|
||||
"base_url": "${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}",
|
||||
"api_key": "${env.WATSONX_API_KEY:=}",
|
||||
"project_id": "${env.WATSONX_PROJECT_ID:=}",
|
||||
}
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
)
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
return self.config.url
|
||||
return str(self.config.base_url)
|
||||
|
||||
# Copied from OpenAIMixin
|
||||
async def check_model_availability(self, model: str) -> bool:
|
||||
|
|
@ -316,7 +316,7 @@ class WatsonXInferenceAdapter(LiteLLMOpenAIMixin):
|
|||
"""
|
||||
Retrieves foundation model specifications from the watsonx.ai API.
|
||||
"""
|
||||
url = f"{self.config.url}/ml/v1/foundation_model_specs?version=2023-10-25"
|
||||
url = f"{str(self.config.base_url)}/ml/v1/foundation_model_specs?version=2023-10-25"
|
||||
headers = {
|
||||
# Note that there is no authorization header. Listing models does not require authentication.
|
||||
"Content-Type": "application/json",
|
||||
|
|
|
|||
|
|
@ -3,23 +3,10 @@
|
|||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
from collections.abc import Iterable
|
||||
from typing import (
|
||||
Any,
|
||||
)
|
||||
|
||||
from openai.types.chat import (
|
||||
ChatCompletionContentPartParam as OpenAIChatCompletionContentPartParam,
|
||||
)
|
||||
|
||||
try:
|
||||
from openai.types.chat import (
|
||||
ChatCompletionMessageFunctionToolCall as OpenAIChatCompletionMessageFunctionToolCall,
|
||||
)
|
||||
except ImportError:
|
||||
from openai.types.chat.chat_completion_message_tool_call import (
|
||||
ChatCompletionMessageToolCall as OpenAIChatCompletionMessageFunctionToolCall,
|
||||
)
|
||||
from openai.types.chat import (
|
||||
ChatCompletionMessageToolCall,
|
||||
)
|
||||
|
|
@ -32,18 +19,6 @@ from llama_stack.models.llama.datatypes import (
|
|||
ToolCall,
|
||||
ToolDefinition,
|
||||
)
|
||||
from llama_stack_api import (
|
||||
URL,
|
||||
GreedySamplingStrategy,
|
||||
ImageContentItem,
|
||||
JsonSchemaResponseFormat,
|
||||
OpenAIResponseFormatParam,
|
||||
SamplingParams,
|
||||
TextContentItem,
|
||||
TopKSamplingStrategy,
|
||||
TopPSamplingStrategy,
|
||||
_URLOrData,
|
||||
)
|
||||
|
||||
logger = get_logger(name=__name__, category="providers::utils")
|
||||
|
||||
|
|
@ -73,42 +48,6 @@ class OpenAICompatCompletionResponse(BaseModel):
|
|||
choices: list[OpenAICompatCompletionChoice]
|
||||
|
||||
|
||||
def get_sampling_strategy_options(params: SamplingParams) -> dict:
|
||||
options = {}
|
||||
if isinstance(params.strategy, GreedySamplingStrategy):
|
||||
options["temperature"] = 0.0
|
||||
elif isinstance(params.strategy, TopPSamplingStrategy):
|
||||
if params.strategy.temperature is not None:
|
||||
options["temperature"] = params.strategy.temperature
|
||||
if params.strategy.top_p is not None:
|
||||
options["top_p"] = params.strategy.top_p
|
||||
elif isinstance(params.strategy, TopKSamplingStrategy):
|
||||
options["top_k"] = params.strategy.top_k
|
||||
else:
|
||||
raise ValueError(f"Unsupported sampling strategy: {params.strategy}")
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def get_sampling_options(params: SamplingParams | None) -> dict:
|
||||
if not params:
|
||||
return {}
|
||||
|
||||
options = {}
|
||||
if params:
|
||||
options.update(get_sampling_strategy_options(params))
|
||||
if params.max_tokens:
|
||||
options["max_tokens"] = params.max_tokens
|
||||
|
||||
if params.repetition_penalty is not None and params.repetition_penalty != 1.0:
|
||||
options["repeat_penalty"] = params.repetition_penalty
|
||||
|
||||
if params.stop is not None:
|
||||
options["stop"] = params.stop
|
||||
|
||||
return options
|
||||
|
||||
|
||||
def text_from_choice(choice) -> str:
|
||||
if hasattr(choice, "delta") and choice.delta:
|
||||
return choice.delta.content # type: ignore[no-any-return] # external OpenAI types lack precise annotations
|
||||
|
|
@ -253,154 +192,6 @@ def convert_tooldef_to_openai_tool(tool: ToolDefinition) -> dict:
|
|||
return out
|
||||
|
||||
|
||||
def _convert_stop_reason_to_openai_finish_reason(stop_reason: StopReason) -> str:
|
||||
"""
|
||||
Convert a StopReason to an OpenAI chat completion finish_reason.
|
||||
"""
|
||||
return {
|
||||
StopReason.end_of_turn: "stop",
|
||||
StopReason.end_of_message: "tool_calls",
|
||||
StopReason.out_of_tokens: "length",
|
||||
}.get(stop_reason, "stop")
|
||||
|
||||
|
||||
def _convert_openai_finish_reason(finish_reason: str) -> StopReason:
|
||||
"""
|
||||
Convert an OpenAI chat completion finish_reason to a StopReason.
|
||||
|
||||
finish_reason: Literal["stop", "length", "tool_calls", ...]
|
||||
- stop: model hit a natural stop point or a provided stop sequence
|
||||
- length: maximum number of tokens specified in the request was reached
|
||||
- tool_calls: model called a tool
|
||||
|
||||
->
|
||||
|
||||
class StopReason(Enum):
|
||||
end_of_turn = "end_of_turn"
|
||||
end_of_message = "end_of_message"
|
||||
out_of_tokens = "out_of_tokens"
|
||||
"""
|
||||
|
||||
# TODO(mf): are end_of_turn and end_of_message semantics correct?
|
||||
return {
|
||||
"stop": StopReason.end_of_turn,
|
||||
"length": StopReason.out_of_tokens,
|
||||
"tool_calls": StopReason.end_of_message,
|
||||
}.get(finish_reason, StopReason.end_of_turn)
|
||||
|
||||
|
||||
def _convert_openai_request_tools(tools: list[dict[str, Any]] | None = None) -> list[ToolDefinition]:
|
||||
lls_tools: list[ToolDefinition] = []
|
||||
if not tools:
|
||||
return lls_tools
|
||||
|
||||
for tool in tools:
|
||||
tool_fn = tool.get("function", {})
|
||||
tool_name = tool_fn.get("name", None)
|
||||
tool_desc = tool_fn.get("description", None)
|
||||
tool_params = tool_fn.get("parameters", None)
|
||||
|
||||
lls_tool = ToolDefinition(
|
||||
tool_name=tool_name,
|
||||
description=tool_desc,
|
||||
input_schema=tool_params, # Pass through entire JSON Schema
|
||||
)
|
||||
lls_tools.append(lls_tool)
|
||||
return lls_tools
|
||||
|
||||
|
||||
def _convert_openai_request_response_format(
|
||||
response_format: OpenAIResponseFormatParam | None = None,
|
||||
):
|
||||
if not response_format:
|
||||
return None
|
||||
# response_format can be a dict or a pydantic model
|
||||
response_format_dict = dict(response_format) # type: ignore[arg-type] # OpenAIResponseFormatParam union needs dict conversion
|
||||
if response_format_dict.get("type", "") == "json_schema":
|
||||
return JsonSchemaResponseFormat(
|
||||
type="json_schema", # type: ignore[arg-type] # Literal["json_schema"] incompatible with expected type
|
||||
json_schema=response_format_dict.get("json_schema", {}).get("schema", ""),
|
||||
)
|
||||
return None
|
||||
|
||||
|
||||
def _convert_openai_tool_calls(
|
||||
tool_calls: list[OpenAIChatCompletionMessageFunctionToolCall],
|
||||
) -> list[ToolCall]:
|
||||
"""
|
||||
Convert an OpenAI ChatCompletionMessageToolCall list into a list of ToolCall.
|
||||
|
||||
OpenAI ChatCompletionMessageToolCall:
|
||||
id: str
|
||||
function: Function
|
||||
type: Literal["function"]
|
||||
|
||||
OpenAI Function:
|
||||
arguments: str
|
||||
name: str
|
||||
|
||||
->
|
||||
|
||||
ToolCall:
|
||||
call_id: str
|
||||
tool_name: str
|
||||
arguments: Dict[str, ...]
|
||||
"""
|
||||
if not tool_calls:
|
||||
return [] # CompletionMessage tool_calls is not optional
|
||||
|
||||
return [
|
||||
ToolCall(
|
||||
call_id=call.id,
|
||||
tool_name=call.function.name,
|
||||
arguments=call.function.arguments,
|
||||
)
|
||||
for call in tool_calls
|
||||
]
|
||||
|
||||
|
||||
def _convert_openai_sampling_params(
|
||||
max_tokens: int | None = None,
|
||||
temperature: float | None = None,
|
||||
top_p: float | None = None,
|
||||
) -> SamplingParams:
|
||||
sampling_params = SamplingParams()
|
||||
|
||||
if max_tokens:
|
||||
sampling_params.max_tokens = max_tokens
|
||||
|
||||
# Map an explicit temperature of 0 to greedy sampling
|
||||
if temperature == 0:
|
||||
sampling_params.strategy = GreedySamplingStrategy()
|
||||
else:
|
||||
# OpenAI defaults to 1.0 for temperature and top_p if unset
|
||||
if temperature is None:
|
||||
temperature = 1.0
|
||||
if top_p is None:
|
||||
top_p = 1.0
|
||||
sampling_params.strategy = TopPSamplingStrategy(temperature=temperature, top_p=top_p) # type: ignore[assignment] # SamplingParams.strategy union accepts this type
|
||||
|
||||
return sampling_params
|
||||
|
||||
|
||||
def openai_content_to_content(content: str | Iterable[OpenAIChatCompletionContentPartParam] | None):
|
||||
if content is None:
|
||||
return ""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
elif isinstance(content, list):
|
||||
return [openai_content_to_content(c) for c in content]
|
||||
elif hasattr(content, "type"):
|
||||
if content.type == "text":
|
||||
return TextContentItem(type="text", text=content.text) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
|
||||
elif content.type == "image_url":
|
||||
return ImageContentItem(type="image", image=_URLOrData(url=URL(uri=content.image_url.url))) # type: ignore[attr-defined] # Iterable narrowed by hasattr check but mypy doesn't track
|
||||
else:
|
||||
raise ValueError(f"Unknown content type: {content.type}")
|
||||
else:
|
||||
raise ValueError(f"Unknown content type: {content}")
|
||||
|
||||
|
||||
async def prepare_openai_completion_params(**params):
|
||||
async def _prepare_value(value: Any) -> Any:
|
||||
new_value = value
|
||||
|
|
|
|||
|
|
@ -213,6 +213,19 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
|
||||
return api_key
|
||||
|
||||
def _validate_model_allowed(self, provider_model_id: str) -> None:
|
||||
"""
|
||||
Validate that the model is in the allowed_models list if configured.
|
||||
|
||||
:param provider_model_id: The provider-specific model ID to validate
|
||||
:raises ValueError: If the model is not in the allowed_models list
|
||||
"""
|
||||
if self.config.allowed_models is not None and provider_model_id not in self.config.allowed_models:
|
||||
raise ValueError(
|
||||
f"Model '{provider_model_id}' is not in the allowed models list. "
|
||||
f"Allowed models: {self.config.allowed_models}"
|
||||
)
|
||||
|
||||
async def _get_provider_model_id(self, model: str) -> str:
|
||||
"""
|
||||
Get the provider-specific model ID from the model store.
|
||||
|
|
@ -259,8 +272,11 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
Direct OpenAI completion API call.
|
||||
"""
|
||||
# TODO: fix openai_completion to return type compatible with OpenAI's API response
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
completion_kwargs = await prepare_openai_completion_params(
|
||||
model=await self._get_provider_model_id(params.model),
|
||||
model=provider_model_id,
|
||||
prompt=params.prompt,
|
||||
best_of=params.best_of,
|
||||
echo=params.echo,
|
||||
|
|
@ -292,6 +308,9 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
"""
|
||||
Direct OpenAI chat completion API call.
|
||||
"""
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
messages = params.messages
|
||||
|
||||
if self.download_images:
|
||||
|
|
@ -313,7 +332,7 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
messages = [await _localize_image_url(m) for m in messages]
|
||||
|
||||
request_params = await prepare_openai_completion_params(
|
||||
model=await self._get_provider_model_id(params.model),
|
||||
model=provider_model_id,
|
||||
messages=messages,
|
||||
frequency_penalty=params.frequency_penalty,
|
||||
function_call=params.function_call,
|
||||
|
|
@ -351,10 +370,13 @@ class OpenAIMixin(NeedsRequestProviderData, ABC, BaseModel):
|
|||
"""
|
||||
Direct OpenAI embeddings API call.
|
||||
"""
|
||||
provider_model_id = await self._get_provider_model_id(params.model)
|
||||
self._validate_model_allowed(provider_model_id)
|
||||
|
||||
# Build request params conditionally to avoid NotGiven/Omit type mismatch
|
||||
# The OpenAI SDK uses Omit in signatures but NOT_GIVEN has type NotGiven
|
||||
request_params: dict[str, Any] = {
|
||||
"model": await self._get_provider_model_id(params.model),
|
||||
"model": provider_model_id,
|
||||
"input": params.input,
|
||||
}
|
||||
if params.encoding_format is not None:
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
from typing import Annotated, Any, Literal, Protocol, runtime_checkable
|
||||
|
||||
from fastapi import Body, Query
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, field_validator
|
||||
|
||||
from llama_stack_api.common.tracing import telemetry_traceable
|
||||
from llama_stack_api.inference import InterleavedContent
|
||||
|
|
@ -372,6 +372,65 @@ VectorStoreFileStatus = Literal["completed"] | Literal["in_progress"] | Literal[
|
|||
register_schema(VectorStoreFileStatus, name="VectorStoreFileStatus")
|
||||
|
||||
|
||||
# VectorStoreFileAttributes type with OpenAPI constraints
|
||||
VectorStoreFileAttributes = Annotated[
|
||||
dict[str, Annotated[str, Field(max_length=512)] | float | bool],
|
||||
Field(
|
||||
max_length=16,
|
||||
json_schema_extra={
|
||||
"propertyNames": {"type": "string", "maxLength": 64},
|
||||
"x-oaiTypeLabel": "map",
|
||||
},
|
||||
description=(
|
||||
"Set of 16 key-value pairs that can be attached to an object. This can be "
|
||||
"useful for storing additional information about the object in a structured "
|
||||
"format, and querying for objects via API or the dashboard. Keys are strings "
|
||||
"with a maximum length of 64 characters. Values are strings with a maximum "
|
||||
"length of 512 characters, booleans, or numbers."
|
||||
),
|
||||
),
|
||||
]
|
||||
|
||||
|
||||
def _sanitize_vector_store_attributes(metadata: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
||||
"""
|
||||
Sanitize metadata to VectorStoreFileAttributes spec (max 16 properties, primitives only).
|
||||
|
||||
Converts dict[str, Any] to dict[str, str | float | bool]:
|
||||
- Preserves: str (truncated to 512 chars), bool, int/float (as float)
|
||||
- Converts: list -> comma-separated string
|
||||
- Filters: dict, None, other types
|
||||
- Enforces: max 16 properties, max 64 char keys, max 512 char string values
|
||||
"""
|
||||
if not metadata:
|
||||
return {}
|
||||
|
||||
sanitized: dict[str, str | float | bool] = {}
|
||||
for key, value in metadata.items():
|
||||
# Enforce max 16 properties
|
||||
if len(sanitized) >= 16:
|
||||
break
|
||||
|
||||
# Enforce max 64 char keys
|
||||
if len(key) > 64:
|
||||
continue
|
||||
|
||||
# Convert to supported primitive types
|
||||
if isinstance(value, bool):
|
||||
sanitized[key] = value
|
||||
elif isinstance(value, int | float):
|
||||
sanitized[key] = float(value)
|
||||
elif isinstance(value, str):
|
||||
# Enforce max 512 char string values
|
||||
sanitized[key] = value[:512] if len(value) > 512 else value
|
||||
elif isinstance(value, list):
|
||||
# Convert lists to comma-separated strings (max 512 chars)
|
||||
list_str = ", ".join(str(item) for item in value)
|
||||
sanitized[key] = list_str[:512] if len(list_str) > 512 else list_str
|
||||
|
||||
return sanitized
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreFileObject(BaseModel):
|
||||
"""OpenAI Vector Store File object.
|
||||
|
|
@ -389,7 +448,7 @@ class VectorStoreFileObject(BaseModel):
|
|||
|
||||
id: str
|
||||
object: str = "vector_store.file"
|
||||
attributes: dict[str, Any] = Field(default_factory=dict)
|
||||
attributes: VectorStoreFileAttributes = Field(default_factory=dict)
|
||||
chunking_strategy: VectorStoreChunkingStrategy
|
||||
created_at: int
|
||||
last_error: VectorStoreFileLastError | None = None
|
||||
|
|
@ -397,6 +456,12 @@ class VectorStoreFileObject(BaseModel):
|
|||
usage_bytes: int = 0
|
||||
vector_store_id: str
|
||||
|
||||
@field_validator("attributes", mode="before")
|
||||
@classmethod
|
||||
def _validate_attributes(cls, v: dict[str, Any] | None) -> dict[str, str | float | bool]:
|
||||
"""Sanitize attributes to match VectorStoreFileAttributes OpenAPI spec."""
|
||||
return _sanitize_vector_store_attributes(v)
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class VectorStoreListFilesResponse(BaseModel):
|
||||
|
|
|
|||
|
|
@ -211,3 +211,23 @@ def test_asymmetric_embeddings(llama_stack_client, embedding_model_id):
|
|||
|
||||
assert query_response.embeddings is not None
|
||||
```
|
||||
|
||||
## TypeScript Client Replays
|
||||
|
||||
TypeScript SDK tests can run alongside Python tests when testing against `server:<config>` stacks. Set `TS_CLIENT_PATH` to the path or version of `llama-stack-client-typescript` to enable:
|
||||
|
||||
```bash
|
||||
# Use published npm package (responses suite)
|
||||
TS_CLIENT_PATH=^0.3.2 scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
|
||||
|
||||
# Use local checkout from ~/.cache (recommended for development)
|
||||
git clone https://github.com/llamastack/llama-stack-client-typescript.git ~/.cache/llama-stack-client-typescript
|
||||
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite responses --setup gpt
|
||||
|
||||
# Run base suite with TypeScript tests
|
||||
TS_CLIENT_PATH=~/.cache/llama-stack-client-typescript scripts/integration-tests.sh --stack-config server:ci-tests --suite base --setup ollama
|
||||
```
|
||||
|
||||
TypeScript tests run immediately after Python tests pass, using the same replay fixtures. The mapping between Python suites/setups and TypeScript test files is defined in `tests/integration/client-typescript/suites.json`.
|
||||
|
||||
If `TS_CLIENT_PATH` is unset, TypeScript tests are skipped entirely.
|
||||
|
|
|
|||
|
|
@ -516,169 +516,3 @@ def test_response_with_instructions(openai_client, client_with_models, text_mode
|
|||
|
||||
# Verify instructions from previous response was not carried over to the next response
|
||||
assert response_with_instructions2.instructions == instructions2
|
||||
|
||||
|
||||
@pytest.mark.skip(reason="Tool calling is not reliable.")
|
||||
def test_max_tool_calls_with_function_tools(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of max_tool_calls with function tools in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
max_tool_calls = 1
|
||||
|
||||
tools = [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get weather information for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"name": "get_time",
|
||||
"description": "Get current time for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
]
|
||||
|
||||
# First create a response that triggers function tools
|
||||
response = client.responses.create(
|
||||
model=text_model_id,
|
||||
input="Can you tell me the weather in Paris and the current time?",
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls,
|
||||
)
|
||||
|
||||
# Verify we got two function calls and that the max_tool_calls do not affect function tools
|
||||
assert len(response.output) == 2
|
||||
assert response.output[0].type == "function_call"
|
||||
assert response.output[0].name == "get_weather"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "function_call"
|
||||
assert response.output[1].name == "get_time"
|
||||
assert response.output[0].status == "completed"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response.max_tool_calls == max_tool_calls
|
||||
|
||||
|
||||
def test_max_tool_calls_invalid(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of invalid max_tool_calls in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
|
||||
input = "Search for today's top technology news."
|
||||
invalid_max_tool_calls = 0
|
||||
tools = [
|
||||
{"type": "web_search"},
|
||||
]
|
||||
|
||||
# Create a response with an invalid max_tool_calls value i.e. 0
|
||||
# Handle ValueError from LLS and BadRequestError from OpenAI client
|
||||
with pytest.raises((ValueError, BadRequestError)) as excinfo:
|
||||
client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=invalid_max_tool_calls,
|
||||
)
|
||||
|
||||
error_message = str(excinfo.value)
|
||||
assert f"Invalid max_tool_calls={invalid_max_tool_calls}; should be >= 1" in error_message, (
|
||||
f"Expected error message about invalid max_tool_calls, got: {error_message}"
|
||||
)
|
||||
|
||||
|
||||
def test_max_tool_calls_with_builtin_tools(openai_client, client_with_models, text_model_id):
|
||||
"""Test handling of max_tool_calls with built-in tools in responses."""
|
||||
if isinstance(client_with_models, LlamaStackAsLibraryClient):
|
||||
pytest.skip("OpenAI responses are not supported when testing with library client yet.")
|
||||
|
||||
client = openai_client
|
||||
|
||||
input = "Search for today's top technology and a positive news story. You MUST make exactly two separate web search calls."
|
||||
max_tool_calls = [1, 5]
|
||||
tools = [
|
||||
{"type": "web_search"},
|
||||
]
|
||||
|
||||
# First create a response that triggers web_search tools without max_tool_calls
|
||||
response = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
)
|
||||
|
||||
# Verify we got two web search calls followed by a message
|
||||
assert len(response.output) == 3
|
||||
assert response.output[0].type == "web_search_call"
|
||||
assert response.output[0].status == "completed"
|
||||
assert response.output[1].type == "web_search_call"
|
||||
assert response.output[1].status == "completed"
|
||||
assert response.output[2].type == "message"
|
||||
assert response.output[2].status == "completed"
|
||||
assert response.output[2].role == "assistant"
|
||||
|
||||
# Next create a response that triggers web_search tools with max_tool_calls set to 1
|
||||
response_2 = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls[0],
|
||||
)
|
||||
|
||||
# Verify we got one web search tool call followed by a message
|
||||
assert len(response_2.output) == 2
|
||||
assert response_2.output[0].type == "web_search_call"
|
||||
assert response_2.output[0].status == "completed"
|
||||
assert response_2.output[1].type == "message"
|
||||
assert response_2.output[1].status == "completed"
|
||||
assert response_2.output[1].role == "assistant"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response_2.max_tool_calls == max_tool_calls[0]
|
||||
|
||||
# Finally create a response that triggers web_search tools with max_tool_calls set to 5
|
||||
response_3 = client.responses.create(
|
||||
model=text_model_id,
|
||||
input=input,
|
||||
tools=tools,
|
||||
stream=False,
|
||||
max_tool_calls=max_tool_calls[1],
|
||||
)
|
||||
|
||||
# Verify we got two web search calls followed by a message
|
||||
assert len(response_3.output) == 3
|
||||
assert response_3.output[0].type == "web_search_call"
|
||||
assert response_3.output[0].status == "completed"
|
||||
assert response_3.output[1].type == "web_search_call"
|
||||
assert response_3.output[1].status == "completed"
|
||||
assert response_3.output[2].type == "message"
|
||||
assert response_3.output[2].status == "completed"
|
||||
assert response_3.output[2].role == "assistant"
|
||||
|
||||
# Verify we have a valid max_tool_calls field
|
||||
assert response_3.max_tool_calls == max_tool_calls[1]
|
||||
|
|
|
|||
104
tests/integration/client-typescript/__tests__/inference.test.ts
Normal file
104
tests/integration/client-typescript/__tests__/inference.test.ts
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Integration tests for Inference API (Chat Completions).
|
||||
* Ported from: llama-stack/tests/integration/inference/test_openai_completion.py
|
||||
*
|
||||
* IMPORTANT: Test cases must match EXACTLY with Python tests to use recorded API responses.
|
||||
*/
|
||||
|
||||
import { createTestClient, requireTextModel } from '../setup';
|
||||
|
||||
describe('Inference API - Chat Completions', () => {
|
||||
// Test cases matching llama-stack/tests/integration/test_cases/inference/chat_completion.json
|
||||
const chatCompletionTestCases = [
|
||||
{
|
||||
id: 'non_streaming_01',
|
||||
question: 'Which planet do humans live on?',
|
||||
expected: 'earth',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_01]',
|
||||
},
|
||||
{
|
||||
id: 'non_streaming_02',
|
||||
question: 'Which planet has rings around it with a name starting with letter S?',
|
||||
expected: 'saturn',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_non_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:non_streaming_02]',
|
||||
},
|
||||
];
|
||||
|
||||
const streamingTestCases = [
|
||||
{
|
||||
id: 'streaming_01',
|
||||
question: "What's the name of the Sun in latin?",
|
||||
expected: 'sol',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_01]',
|
||||
},
|
||||
{
|
||||
id: 'streaming_02',
|
||||
question: 'What is the name of the US captial?',
|
||||
expected: 'washington',
|
||||
testId:
|
||||
'tests/integration/inference/test_openai_completion.py::test_openai_chat_completion_streaming[client_with_models-txt=ollama/llama3.2:3b-instruct-fp16-inference:chat_completion:streaming_02]',
|
||||
},
|
||||
];
|
||||
|
||||
test.each(chatCompletionTestCases)(
|
||||
'chat completion non-streaming: $id',
|
||||
async ({ question, expected, testId }) => {
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
const response = await client.chat.completions.create({
|
||||
model: textModel,
|
||||
messages: [
|
||||
{
|
||||
role: 'user',
|
||||
content: question,
|
||||
},
|
||||
],
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Non-streaming responses have choices with message property
|
||||
const choice = response.choices[0];
|
||||
expect(choice).toBeDefined();
|
||||
if (!choice || !('message' in choice)) {
|
||||
throw new Error('Expected non-streaming response with message');
|
||||
}
|
||||
const content = choice.message.content;
|
||||
expect(content).toBeDefined();
|
||||
const messageContent = typeof content === 'string' ? content.toLowerCase().trim() : '';
|
||||
expect(messageContent.length).toBeGreaterThan(0);
|
||||
expect(messageContent).toContain(expected.toLowerCase());
|
||||
},
|
||||
);
|
||||
|
||||
test.each(streamingTestCases)('chat completion streaming: $id', async ({ question, expected, testId }) => {
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
const stream = await client.chat.completions.create({
|
||||
model: textModel,
|
||||
messages: [{ role: 'user', content: question }],
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const streamedContent: string[] = [];
|
||||
for await (const chunk of stream) {
|
||||
if (chunk.choices && chunk.choices.length > 0 && chunk.choices[0]?.delta?.content) {
|
||||
streamedContent.push(chunk.choices[0].delta.content);
|
||||
}
|
||||
}
|
||||
|
||||
expect(streamedContent.length).toBeGreaterThan(0);
|
||||
const fullContent = streamedContent.join('').toLowerCase().trim();
|
||||
expect(fullContent).toContain(expected.toLowerCase());
|
||||
});
|
||||
});
|
||||
132
tests/integration/client-typescript/__tests__/responses.test.ts
Normal file
132
tests/integration/client-typescript/__tests__/responses.test.ts
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Integration tests for Responses API.
|
||||
* Ported from: llama-stack/tests/integration/responses/test_basic_responses.py
|
||||
*
|
||||
* IMPORTANT: Test cases and IDs must match EXACTLY with Python tests to use recorded API responses.
|
||||
*/
|
||||
|
||||
import { createTestClient, requireTextModel, getResponseOutputText } from '../setup';
|
||||
|
||||
describe('Responses API - Basic', () => {
|
||||
// Test cases matching llama-stack/tests/integration/responses/fixtures/test_cases.py
|
||||
const basicTestCases = [
|
||||
{
|
||||
id: 'earth',
|
||||
input: 'Which planet do humans live on?',
|
||||
expected: 'earth',
|
||||
// Use client_with_models fixture to match non-streaming recordings
|
||||
testId:
|
||||
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-earth]',
|
||||
},
|
||||
{
|
||||
id: 'saturn',
|
||||
input: 'Which planet has rings around it with a name starting with letter S?',
|
||||
expected: 'saturn',
|
||||
testId:
|
||||
'tests/integration/responses/test_basic_responses.py::test_response_non_streaming_basic[client_with_models-txt=openai/gpt-4o-saturn]',
|
||||
},
|
||||
];
|
||||
|
||||
test.each(basicTestCases)('non-streaming basic response: $id', async ({ input, expected, testId }) => {
|
||||
// Create client with test_id for all requests
|
||||
const client = createTestClient(testId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
// Create a response
|
||||
const response = await client.responses.create({
|
||||
model: textModel,
|
||||
input,
|
||||
stream: false,
|
||||
});
|
||||
|
||||
// Verify response has content
|
||||
const outputText = getResponseOutputText(response).toLowerCase().trim();
|
||||
expect(outputText.length).toBeGreaterThan(0);
|
||||
expect(outputText).toContain(expected.toLowerCase());
|
||||
|
||||
// Verify usage is reported
|
||||
expect(response.usage).toBeDefined();
|
||||
expect(response.usage!.input_tokens).toBeGreaterThan(0);
|
||||
expect(response.usage!.output_tokens).toBeGreaterThan(0);
|
||||
expect(response.usage!.total_tokens).toBe(response.usage!.input_tokens + response.usage!.output_tokens);
|
||||
|
||||
// Verify stored response matches
|
||||
const retrievedResponse = await client.responses.retrieve(response.id);
|
||||
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(response));
|
||||
|
||||
// Test follow-up with previous_response_id
|
||||
const nextResponse = await client.responses.create({
|
||||
model: textModel,
|
||||
input: 'Repeat your previous response in all caps.',
|
||||
previous_response_id: response.id,
|
||||
});
|
||||
const nextOutputText = getResponseOutputText(nextResponse).trim();
|
||||
expect(nextOutputText).toContain(expected.toUpperCase());
|
||||
});
|
||||
|
||||
test.each(basicTestCases)('streaming basic response: $id', async ({ input, expected, testId }) => {
|
||||
// Modify test_id for streaming variant
|
||||
const streamingTestId = testId.replace(
|
||||
'test_response_non_streaming_basic',
|
||||
'test_response_streaming_basic',
|
||||
);
|
||||
const client = createTestClient(streamingTestId);
|
||||
const textModel = requireTextModel();
|
||||
|
||||
// Create a streaming response
|
||||
const stream = await client.responses.create({
|
||||
model: textModel,
|
||||
input,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
const events: any[] = [];
|
||||
let responseId = '';
|
||||
|
||||
for await (const chunk of stream) {
|
||||
events.push(chunk);
|
||||
|
||||
if (chunk.type === 'response.created') {
|
||||
// Verify response.created is the first event
|
||||
expect(events.length).toBe(1);
|
||||
expect(chunk.response.status).toBe('in_progress');
|
||||
responseId = chunk.response.id;
|
||||
} else if (chunk.type === 'response.completed') {
|
||||
// Verify response.completed comes after response.created
|
||||
expect(events.length).toBeGreaterThanOrEqual(2);
|
||||
expect(chunk.response.status).toBe('completed');
|
||||
expect(chunk.response.id).toBe(responseId);
|
||||
|
||||
// Verify content quality
|
||||
const outputText = getResponseOutputText(chunk.response).toLowerCase().trim();
|
||||
expect(outputText.length).toBeGreaterThan(0);
|
||||
expect(outputText).toContain(expected.toLowerCase());
|
||||
|
||||
// Verify usage is reported
|
||||
expect(chunk.response.usage).toBeDefined();
|
||||
expect(chunk.response.usage!.input_tokens).toBeGreaterThan(0);
|
||||
expect(chunk.response.usage!.output_tokens).toBeGreaterThan(0);
|
||||
expect(chunk.response.usage!.total_tokens).toBe(
|
||||
chunk.response.usage!.input_tokens + chunk.response.usage!.output_tokens,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Verify we got both events
|
||||
expect(events.length).toBeGreaterThanOrEqual(2);
|
||||
const firstEvent = events[0];
|
||||
const lastEvent = events[events.length - 1];
|
||||
expect(firstEvent.type).toBe('response.created');
|
||||
expect(lastEvent.type).toBe('response.completed');
|
||||
|
||||
// Verify stored response matches streamed response
|
||||
const retrievedResponse = await client.responses.retrieve(responseId);
|
||||
expect(getResponseOutputText(retrievedResponse)).toBe(getResponseOutputText(lastEvent.response));
|
||||
});
|
||||
});
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/** @type {import('ts-jest').JestConfigWithTsJest} */
|
||||
module.exports = {
|
||||
preset: 'ts-jest/presets/default-esm',
|
||||
testEnvironment: 'node',
|
||||
extensionsToTreatAsEsm: ['.ts'],
|
||||
moduleNameMapper: {
|
||||
'^(\\.{1,2}/.*)\\.js$': '$1',
|
||||
},
|
||||
transform: {
|
||||
'^.+\\.tsx?$': [
|
||||
'ts-jest',
|
||||
{
|
||||
useESM: true,
|
||||
tsconfig: {
|
||||
module: 'ES2022',
|
||||
moduleResolution: 'bundler',
|
||||
},
|
||||
},
|
||||
],
|
||||
},
|
||||
testMatch: ['<rootDir>/__tests__/**/*.test.ts'],
|
||||
setupFilesAfterEnv: ['<rootDir>/setup.ts'],
|
||||
testTimeout: 60000, // 60 seconds (integration tests can be slow)
|
||||
watchman: false, // Disable watchman to avoid permission issues
|
||||
};
|
||||
5507
tests/integration/client-typescript/package-lock.json
generated
Normal file
5507
tests/integration/client-typescript/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
18
tests/integration/client-typescript/package.json
Normal file
18
tests/integration/client-typescript/package.json
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
{
|
||||
"name": "llama-stack-typescript-integration-tests",
|
||||
"version": "0.0.1",
|
||||
"private": true,
|
||||
"description": "TypeScript client integration tests for Llama Stack",
|
||||
"scripts": {
|
||||
"test": "node run-tests.js"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@swc/core": "^1.3.102",
|
||||
"@swc/jest": "^0.2.29",
|
||||
"@types/jest": "^29.4.0",
|
||||
"@types/node": "^20.0.0",
|
||||
"jest": "^29.4.0",
|
||||
"ts-jest": "^29.1.0",
|
||||
"typescript": "^5.0.0"
|
||||
}
|
||||
}
|
||||
63
tests/integration/client-typescript/run-tests.js
Executable file
63
tests/integration/client-typescript/run-tests.js
Executable file
|
|
@ -0,0 +1,63 @@
|
|||
#!/usr/bin/env node
|
||||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Test runner that finds and executes TypeScript tests based on suite/setup mapping.
|
||||
* Called by integration-tests.sh via npm test.
|
||||
*/
|
||||
|
||||
const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { execSync } = require('child_process');
|
||||
|
||||
const suite = process.env.LLAMA_STACK_TEST_SUITE;
|
||||
const setup = process.env.LLAMA_STACK_TEST_SETUP || '';
|
||||
|
||||
if (!suite) {
|
||||
console.error('Error: LLAMA_STACK_TEST_SUITE environment variable is required');
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
// Read suites.json to find matching test files
|
||||
const suitesPath = path.join(__dirname, 'suites.json');
|
||||
if (!fs.existsSync(suitesPath)) {
|
||||
console.log(`No TypeScript tests configured (${suitesPath} not found)`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const suites = JSON.parse(fs.readFileSync(suitesPath, 'utf-8'));
|
||||
|
||||
// Find matching entry
|
||||
let testFiles = [];
|
||||
for (const entry of suites) {
|
||||
if (entry.suite !== suite) {
|
||||
continue;
|
||||
}
|
||||
const entrySetup = entry.setup || '';
|
||||
if (entrySetup && entrySetup !== setup) {
|
||||
continue;
|
||||
}
|
||||
testFiles = entry.files || [];
|
||||
break;
|
||||
}
|
||||
|
||||
if (testFiles.length === 0) {
|
||||
console.log(`No TypeScript integration tests mapped for suite ${suite} (setup ${setup})`);
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
console.log(`Running TypeScript tests for suite ${suite} (setup ${setup}): ${testFiles.join(', ')}`);
|
||||
|
||||
// Run Jest with the mapped test files
|
||||
try {
|
||||
execSync(`npx jest --config jest.integration.config.js ${testFiles.join(' ')}`, {
|
||||
stdio: 'inherit',
|
||||
cwd: __dirname,
|
||||
});
|
||||
} catch (error) {
|
||||
process.exit(error.status || 1);
|
||||
}
|
||||
162
tests/integration/client-typescript/setup.ts
Normal file
162
tests/integration/client-typescript/setup.ts
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
// Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
// All rights reserved.
|
||||
//
|
||||
// This source code is licensed under the terms described in the LICENSE file in
|
||||
// the root directory of this source tree.
|
||||
|
||||
/**
|
||||
* Global setup for integration tests.
|
||||
* This file mimics pytest's fixture system by providing shared test configuration.
|
||||
*/
|
||||
|
||||
import LlamaStackClient from 'llama-stack-client';
|
||||
|
||||
/**
|
||||
* Load test configuration from the Python setup system.
|
||||
* This reads setup definitions from tests/integration/suites.py via get_setup_env.py.
|
||||
*/
|
||||
function loadTestConfig() {
|
||||
const baseURL = process.env['TEST_API_BASE_URL'];
|
||||
const setupName = process.env['LLAMA_STACK_TEST_SETUP'];
|
||||
const textModel = process.env['LLAMA_STACK_TEST_TEXT_MODEL'];
|
||||
const embeddingModel = process.env['LLAMA_STACK_TEST_EMBEDDING_MODEL'];
|
||||
|
||||
if (!baseURL) {
|
||||
throw new Error(
|
||||
'TEST_API_BASE_URL is required for integration tests. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
|
||||
return {
|
||||
baseURL,
|
||||
textModel,
|
||||
embeddingModel,
|
||||
setupName,
|
||||
};
|
||||
}
|
||||
|
||||
// Read configuration from environment variables (set by scripts/integration-test.sh)
|
||||
export const TEST_CONFIG = loadTestConfig();
|
||||
|
||||
// Validate required configuration
|
||||
beforeAll(() => {
|
||||
console.log('\n=== Integration Test Configuration ===');
|
||||
console.log(`Base URL: ${TEST_CONFIG.baseURL}`);
|
||||
console.log(`Setup: ${TEST_CONFIG.setupName || 'NOT SET'}`);
|
||||
console.log(
|
||||
`Text Model: ${TEST_CONFIG.textModel || 'NOT SET - tests requiring text model will be skipped'}`,
|
||||
);
|
||||
console.log(
|
||||
`Embedding Model: ${
|
||||
TEST_CONFIG.embeddingModel || 'NOT SET - tests requiring embedding model will be skipped'
|
||||
}`,
|
||||
);
|
||||
console.log('=====================================\n');
|
||||
});
|
||||
|
||||
/**
|
||||
* Create a client instance for integration tests.
|
||||
* Mimics pytest's `llama_stack_client` fixture.
|
||||
*
|
||||
* @param testId - Test ID to send in X-LlamaStack-Provider-Data header for replay mode.
|
||||
* Format: "tests/integration/responses/test_basic_responses.py::test_name[params]"
|
||||
*/
|
||||
export function createTestClient(testId?: string): LlamaStackClient {
|
||||
const headers: Record<string, string> = {};
|
||||
|
||||
// In server mode with replay, send test ID for recording isolation
|
||||
if (process.env['LLAMA_STACK_TEST_STACK_CONFIG_TYPE'] === 'server' && testId) {
|
||||
headers['X-LlamaStack-Provider-Data'] = JSON.stringify({
|
||||
__test_id: testId,
|
||||
});
|
||||
}
|
||||
|
||||
return new LlamaStackClient({
|
||||
baseURL: TEST_CONFIG.baseURL,
|
||||
timeout: 60000, // 60 seconds
|
||||
defaultHeaders: headers,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Skip test if required model is not configured.
|
||||
* Mimics pytest's `skip_if_no_model` autouse fixture.
|
||||
*/
|
||||
export function skipIfNoModel(modelType: 'text' | 'embedding'): typeof test {
|
||||
const model = modelType === 'text' ? TEST_CONFIG.textModel : TEST_CONFIG.embeddingModel;
|
||||
|
||||
if (!model) {
|
||||
const envVar = modelType === 'text' ? 'LLAMA_STACK_TEST_TEXT_MODEL' : 'LLAMA_STACK_TEST_EMBEDDING_MODEL';
|
||||
const message = `Skipping: ${modelType} model not configured (set ${envVar})`;
|
||||
return test.skip.bind(test) as typeof test;
|
||||
}
|
||||
|
||||
return test;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configured text model, throwing if not set.
|
||||
* Use this in tests that absolutely require a text model.
|
||||
*/
|
||||
export function requireTextModel(): string {
|
||||
if (!TEST_CONFIG.textModel) {
|
||||
throw new Error(
|
||||
'LLAMA_STACK_TEST_TEXT_MODEL environment variable is required. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
return TEST_CONFIG.textModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the configured embedding model, throwing if not set.
|
||||
* Use this in tests that absolutely require an embedding model.
|
||||
*/
|
||||
export function requireEmbeddingModel(): string {
|
||||
if (!TEST_CONFIG.embeddingModel) {
|
||||
throw new Error(
|
||||
'LLAMA_STACK_TEST_EMBEDDING_MODEL environment variable is required. ' +
|
||||
'Run tests using: ./scripts/integration-test.sh',
|
||||
);
|
||||
}
|
||||
return TEST_CONFIG.embeddingModel;
|
||||
}
|
||||
|
||||
/**
|
||||
* Extracts aggregated text output from a ResponseObject.
|
||||
* This concatenates all text content from the response's output array.
|
||||
*
|
||||
* Copied from llama-stack-client's response-helpers until it's available in published version.
|
||||
*/
|
||||
export function getResponseOutputText(response: any): string {
|
||||
const pieces: string[] = [];
|
||||
|
||||
for (const output of response.output ?? []) {
|
||||
if (!output || output.type !== 'message') {
|
||||
continue;
|
||||
}
|
||||
|
||||
const content = output.content;
|
||||
if (typeof content === 'string') {
|
||||
pieces.push(content);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
for (const item of content) {
|
||||
if (typeof item === 'string') {
|
||||
pieces.push(item);
|
||||
continue;
|
||||
}
|
||||
if (item && item.type === 'output_text' && 'text' in item && typeof item.text === 'string') {
|
||||
pieces.push(item.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return pieces.join('');
|
||||
}
|
||||
12
tests/integration/client-typescript/suites.json
Normal file
12
tests/integration/client-typescript/suites.json
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
[
|
||||
{
|
||||
"suite": "responses",
|
||||
"setup": "gpt",
|
||||
"files": ["__tests__/responses.test.ts"]
|
||||
},
|
||||
{
|
||||
"suite": "base",
|
||||
"setup": "ollama",
|
||||
"files": ["__tests__/inference.test.ts"]
|
||||
}
|
||||
]
|
||||
16
tests/integration/client-typescript/tsconfig.json
Normal file
16
tests/integration/client-typescript/tsconfig.json
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
{
|
||||
"compilerOptions": {
|
||||
"target": "ES2022",
|
||||
"module": "ES2022",
|
||||
"lib": ["ES2022"],
|
||||
"moduleResolution": "bundler",
|
||||
"esModuleInterop": true,
|
||||
"allowSyntheticDefaultImports": true,
|
||||
"strict": true,
|
||||
"skipLibCheck": true,
|
||||
"resolveJsonModule": true,
|
||||
"types": ["jest", "node"]
|
||||
},
|
||||
"include": ["**/*.ts"],
|
||||
"exclude": ["node_modules"]
|
||||
}
|
||||
|
|
@ -0,0 +1,773 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[client_with_models-txt=openai/gpt-4o]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_user_id",
|
||||
"description": "\n Get the user ID for a given username. This ID is needed for other operations.\n\n :param username: The username to look up\n :return: The user ID for the username\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"username"
|
||||
],
|
||||
"title": "get_user_idArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_user_permissions",
|
||||
"description": "\n Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n :param user_id: The user ID to check permissions for\n :return: The permissions for the user\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"title": "User Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"user_id"
|
||||
],
|
||||
"title": "get_user_permissionsArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "check_file_access",
|
||||
"description": "\n Check if a user can access a specific file. Requires a valid user ID.\n\n :param user_id: The user ID to check access for\n :param filename: The filename to check access to\n :return: Whether the user can access the file (yes/no)\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"title": "User Id",
|
||||
"type": "string"
|
||||
},
|
||||
"filename": {
|
||||
"title": "Filename",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"user_id",
|
||||
"filename"
|
||||
],
|
||||
"title": "check_file_accessArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_experiment_id",
|
||||
"description": "\n Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n :param experiment_name: The name of the experiment\n :return: The experiment ID\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"experiment_name": {
|
||||
"title": "Experiment Name",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"experiment_name"
|
||||
],
|
||||
"title": "get_experiment_idArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_experiment_results",
|
||||
"description": "\n Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n :param experiment_id: The experiment ID to get results for\n :return: The experiment results\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"experiment_id": {
|
||||
"title": "Experiment Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"experiment_id"
|
||||
],
|
||||
"title": "get_experiment_resultsArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "1V9w3bXnppL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_y8S7JKR2Qhu4Bh1uxdHRcNDg",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_experiment_id"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "YEsj"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"ex",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "n"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "perim",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "Q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ent_na",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "me\":",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "U"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " \"boi",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ling_p",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "oint",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "ha"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "d5D"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": "call_HELkyZOm2fzLx2CeTH3bEcS2",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_user_id"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "0LbsjDcKz6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"us",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "c"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ernam",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "e\": \"c",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "7C0WFn181I3y3l"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "harl",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "wf"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ie\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "r"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "FAci"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-1997dc007d20",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": {
|
||||
"completion_tokens": 51,
|
||||
"prompt_tokens": 393,
|
||||
"total_tokens": 444,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "6xgpRRdKjviPT"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,593 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_function_tools[openai_client-txt=openai/gpt-4o]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Can you tell me the weather in Paris and the current time?"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"type": "function",
|
||||
"name": "get_weather",
|
||||
"description": "Get weather information for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')"
|
||||
}
|
||||
}
|
||||
},
|
||||
"strict": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"type": "function",
|
||||
"name": "get_time",
|
||||
"description": "Get current time for a specified location",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"location": {
|
||||
"type": "string",
|
||||
"description": "The city name (e.g., 'New York', 'London')"
|
||||
}
|
||||
}
|
||||
},
|
||||
"strict": null
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "QmTXstGvpa8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_HJMoLtHXfCzhlMQOfqIKt0n3",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_weather"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "iFjmkK23KL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"lo",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "7"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "catio",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "L"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "n\": \"P",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "THa6gWbrWhVmZ6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "aris",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "eL"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "jng"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": "call_vGKvTKZM7aALMaUw3Jas7lRg",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_time"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "LSailgMcgSl54"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"lo",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "z"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "catio",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "4"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "n\": \"P",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "0engr6vRvqXTEP"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "aris",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "Pe"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "LU9"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": null,
|
||||
"obfuscation": "kD7d"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-463ab0e2f291",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_b1442291a8",
|
||||
"usage": {
|
||||
"completion_tokens": 44,
|
||||
"prompt_tokens": 110,
|
||||
"total_tokens": 154,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "R4ICoxqTqj7ZY"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
|
|
@ -0,0 +1,773 @@
|
|||
{
|
||||
"test_id": "tests/integration/responses/test_tool_responses.py::test_max_tool_calls_with_mcp_tools[openai_client-txt=openai/gpt-4o]",
|
||||
"request": {
|
||||
"method": "POST",
|
||||
"url": "https://api.openai.com/v1/v1/chat/completions",
|
||||
"headers": {},
|
||||
"body": {
|
||||
"model": "gpt-4o",
|
||||
"messages": [
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Get the experiment ID for 'boiling_point' and get the user ID for 'charlie'"
|
||||
}
|
||||
],
|
||||
"stream": true,
|
||||
"stream_options": {
|
||||
"include_usage": true
|
||||
},
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_user_id",
|
||||
"description": "\n Get the user ID for a given username. This ID is needed for other operations.\n\n :param username: The username to look up\n :return: The user ID for the username\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"username": {
|
||||
"title": "Username",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"username"
|
||||
],
|
||||
"title": "get_user_idArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_user_permissions",
|
||||
"description": "\n Get the permissions for a user ID. Requires a valid user ID from get_user_id.\n\n :param user_id: The user ID to check permissions for\n :return: The permissions for the user\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"title": "User Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"user_id"
|
||||
],
|
||||
"title": "get_user_permissionsArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "check_file_access",
|
||||
"description": "\n Check if a user can access a specific file. Requires a valid user ID.\n\n :param user_id: The user ID to check access for\n :param filename: The filename to check access to\n :return: Whether the user can access the file (yes/no)\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"user_id": {
|
||||
"title": "User Id",
|
||||
"type": "string"
|
||||
},
|
||||
"filename": {
|
||||
"title": "Filename",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"user_id",
|
||||
"filename"
|
||||
],
|
||||
"title": "check_file_accessArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_experiment_id",
|
||||
"description": "\n Get the experiment ID for a given experiment name. This ID is needed to get results.\n\n :param experiment_name: The name of the experiment\n :return: The experiment ID\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"experiment_name": {
|
||||
"title": "Experiment Name",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"experiment_name"
|
||||
],
|
||||
"title": "get_experiment_idArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "get_experiment_results",
|
||||
"description": "\n Get the results for an experiment ID. Requires a valid experiment ID from get_experiment_id.\n\n :param experiment_id: The experiment ID to get results for\n :return: The experiment results\n ",
|
||||
"parameters": {
|
||||
"properties": {
|
||||
"experiment_id": {
|
||||
"title": "Experiment Id",
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"required": [
|
||||
"experiment_id"
|
||||
],
|
||||
"title": "get_experiment_resultsArguments",
|
||||
"type": "object"
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"endpoint": "/v1/chat/completions",
|
||||
"model": "gpt-4o"
|
||||
},
|
||||
"response": {
|
||||
"body": [
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": "assistant",
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "N5OTLR9CfmU"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": "call_z8P1RQv54BLxyMlRdMFkcCGd",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_experiment_id"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "3EKK"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"ex",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "R"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "perim",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "Q"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ent_na",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "me\":",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "6"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": " \"boi",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ling_p",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": ""
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "oint",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "pw"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 0,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "Gfk"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": "call_I5tcLgyMADoVwLKDj9HkTCs5",
|
||||
"function": {
|
||||
"arguments": "",
|
||||
"name": "get_user_id"
|
||||
},
|
||||
"type": "function"
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "Yp7IueDs5V"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "{\"us",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "8"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ernam",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "X"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "e\": \"c",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "2oif8BwVnTCnAF"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "harl",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "hv"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": [
|
||||
{
|
||||
"index": 1,
|
||||
"id": null,
|
||||
"function": {
|
||||
"arguments": "ie\"}",
|
||||
"name": null
|
||||
},
|
||||
"type": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"finish_reason": null,
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "C"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [
|
||||
{
|
||||
"delta": {
|
||||
"content": null,
|
||||
"function_call": null,
|
||||
"refusal": null,
|
||||
"role": null,
|
||||
"tool_calls": null
|
||||
},
|
||||
"finish_reason": "tool_calls",
|
||||
"index": 0,
|
||||
"logprobs": null
|
||||
}
|
||||
],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": null,
|
||||
"obfuscation": "ctjO"
|
||||
}
|
||||
},
|
||||
{
|
||||
"__type__": "openai.types.chat.chat_completion_chunk.ChatCompletionChunk",
|
||||
"__data__": {
|
||||
"id": "rec-b218af7fa066",
|
||||
"choices": [],
|
||||
"created": 0,
|
||||
"model": "gpt-4o-2024-08-06",
|
||||
"object": "chat.completion.chunk",
|
||||
"service_tier": "default",
|
||||
"system_fingerprint": "fp_c98e05ca17",
|
||||
"usage": {
|
||||
"completion_tokens": 51,
|
||||
"prompt_tokens": 393,
|
||||
"total_tokens": 444,
|
||||
"completion_tokens_details": {
|
||||
"accepted_prediction_tokens": 0,
|
||||
"audio_tokens": 0,
|
||||
"reasoning_tokens": 0,
|
||||
"rejected_prediction_tokens": 0
|
||||
},
|
||||
"prompt_tokens_details": {
|
||||
"audio_tokens": 0,
|
||||
"cached_tokens": 0
|
||||
}
|
||||
},
|
||||
"obfuscation": "fclbZeBSSKN4C"
|
||||
}
|
||||
}
|
||||
],
|
||||
"is_streaming": true
|
||||
},
|
||||
"id_normalization_mapping": {}
|
||||
}
|
||||
1099
tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
generated
Normal file
1099
tests/integration/responses/recordings/b2b5903325356ef0d90af4f2bb8c2a685da5e743820a68de74640451f0072184.json
generated
Normal file
File diff suppressed because it is too large
Load diff
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue