Merge remote-tracking branch 'upstream/main' into elasticsearch-integration

This commit is contained in:
Enrico Zimuel 2025-10-31 18:23:42 +01:00
commit 2407115ee8
No known key found for this signature in database
GPG key ID: 6CB203F6934A69F1
1050 changed files with 65153 additions and 2821 deletions

View file

@ -0,0 +1,64 @@
name: Install llama-stack-client
description: Install llama-stack-client based on branch context and client-version input
inputs:
client-version:
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
required: false
default: ""
outputs:
uv-index-url:
description: 'UV_INDEX_URL to use (set for release branches)'
value: ${{ steps.configure.outputs.uv-index-url }}
uv-extra-index-url:
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
value: ${{ steps.configure.outputs.uv-extra-index-url }}
install-after-sync:
description: 'Whether to install client after uv sync'
value: ${{ steps.configure.outputs.install-after-sync }}
install-source:
description: 'Where to install client from after sync'
value: ${{ steps.configure.outputs.install-source }}
runs:
using: "composite"
steps:
- name: Configure client installation
id: configure
shell: bash
run: |
# Determine the branch we're working with
BRANCH="${{ github.base_ref || github.ref }}"
BRANCH="${BRANCH#refs/heads/}"
echo "Working with branch: $BRANCH"
# On release branches: use test.pypi for uv sync, then install from git
# On non-release branches: install based on client-version after sync
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
echo "Detected release branch: $BRANCH"
# Check if matching branch exists in client repo
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
exit 1
fi
# Configure to use test.pypi for sync (to resolve RC versions)
echo "uv-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
echo "uv-extra-index-url=https://pypi.org/simple/" >> $GITHUB_OUTPUT
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "latest" ]; then
# Install from main git after sync
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
# Use published version from PyPI (installed by sync)
echo "install-after-sync=false" >> $GITHUB_OUTPUT
elif [ -n "${{ inputs.client-version }}" ]; then
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -94,7 +94,7 @@ runs:
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }}
path: |
*.log
retention-days: 1

View file

@ -18,8 +18,17 @@ runs:
python-version: ${{ inputs.python-version }}
version: 0.7.6
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
with:
client-version: ${{ inputs.client-version }}
- name: Install dependencies
shell: bash
env:
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: |
echo "Updating project dependencies via uv sync"
uv sync --all-groups
@ -27,16 +36,10 @@ runs:
echo "Installing ad-hoc dependencies"
uv pip install faiss-cpu
# Install llama-stack-client-python based on the client-version input
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "Installing latest llama-stack-client-python from main branch"
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
uv pip install llama-stack-client
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
# Install specific client version after sync if needed
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install ${{ steps.client-config.outputs.install-source }}
fi
echo "Installed llama packages"

View file

@ -42,18 +42,7 @@ runs:
- name: Build Llama Stack
shell: bash
run: |
# Install llama-stack-client-python based on the client-version input
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "Installing latest llama-stack-client-python from main branch"
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
unset LLAMA_STACK_CLIENT_DIR
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi
# Client is already installed by setup-runner (handles both main and release branches)
echo "Building Llama Stack"
LLAMA_STACK_DIR=. \

View file

@ -4,6 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
| Name | File | Purpose |
| ---- | ---- | ------- |
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |

578
.github/workflows/backward-compat.yml vendored Normal file
View file

@ -0,0 +1,578 @@
name: Backward Compatibility Check
run-name: Check backward compatibility for run.yaml configs
on:
pull_request:
branches:
- main
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+'
paths:
- 'src/llama_stack/core/datatypes.py'
- 'src/llama_stack/providers/datatypes.py'
- 'src/llama_stack/distributions/**/run.yaml'
- 'tests/backward_compat/**'
- '.github/workflows/backward-compat.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check-main-compatibility:
name: Check Compatibility with main
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0 # Need full history to access main branch
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Extract run.yaml files from main branch
id: extract_configs
run: |
# Get list of run.yaml paths from main
git fetch origin main
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "No run.yaml files found in main branch"
exit 1
fi
# Extract all configs to a temp directory
mkdir -p /tmp/main_configs
echo "Extracting configs from main branch:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
# Extract filename for storage
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml"
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files"
- name: Test all configs from main
id: test_configs
continue-on-error: true
run: |
# Run pytest once with all configs parameterized
if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then
echo "failed=false" >> $GITHUB_OUTPUT
else
echo "failed=true" >> $GITHUB_OUTPUT
exit 1
fi
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_configs.outputs.failed == 'true'
run: |
echo "Breaking changes detected. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate results
if: always()
run: |
FAILED="${{ steps.test_configs.outputs.failed }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml."
echo "The changes have been properly acknowledged."
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Breaking changes detected without acknowledgment"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml"
echo "that will break existing user configurations."
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Add the 'breaking-change' label to this PR"
echo " 3. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-main:
name: Run Integration Tests with main Config
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Extract ci-tests run.yaml from main
run: |
git fetch origin main
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
echo "Extracted ci-tests run.yaml from main branch"
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with main config
id: test_integration
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/main-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_integration.outcome == 'failure'
run: |
echo "Integration tests failed. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate integration test results
if: always()
run: |
TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$TEST_FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Integration tests failed with main config but acknowledged"
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Integration tests failed with main config without acknowledgment"
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-release:
name: Run Integration Tests with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract ci-tests run.yaml from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_config
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Try with src/ prefix first (newer releases), then without (older releases)
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
echo "has_config=true" >> $GITHUB_OUTPUT
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
echo "has_config=true" >> $GITHUB_OUTPUT
else
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
echo "has_config=false" >> $GITHUB_OUTPUT
fi
- name: Setup test environment
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (PR branch)
id: test_release_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
git checkout origin/main
- name: Setup test environment for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (main branch)
id: test_release_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_release_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_release_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` are now failing**
⚠️ This PR introduces a breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Compatibility Test Failed (Existing Issue)
**Integration tests against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Compatibility Test Passed
Integration tests against release \`$RELEASE_TAG\` passed successfully.
This PR maintains compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}
check-schema-release-compatibility:
name: Check Schema Compatibility with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract configs from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_release_configs
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Get run.yaml files from the release (try both src/ and old path)
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
echo "has_configs=false" >> $GITHUB_OUTPUT
exit 0
fi
# Extract all configs to a temp directory
mkdir -p /tmp/release_configs
echo "Extracting configs from release $RELEASE_TAG:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files"
echo "has_configs=true" >> $GITHUB_OUTPUT
- name: Test against release configs (PR branch)
id: test_schema_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
git checkout origin/main
- name: Install dependencies for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
uv sync --group dev
- name: Test against release configs (main branch)
id: test_schema_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_schema_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW schema breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` is now failing**
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW schema breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Schema breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Schema Compatibility Failed (Existing Issue)
**Schema validation against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This schema breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Schema Compatibility Passed
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
This PR maintains backward compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}

View file

@ -4,13 +4,17 @@ run-name: Run the integration test suite with Kubernetes authentication
on:
push:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths:
- 'distributions/**'
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'src/llama_stack/**'
- '!src/llama_stack/ui/**'
- 'tests/integration/**'
- 'uv.lock'
- 'pyproject.toml'
@ -91,6 +95,9 @@ jobs:
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
server:
port: 8321
EOF

View file

@ -4,11 +4,15 @@ run-name: Run the integration test suite with SqlStore
on:
push:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths:
- 'llama_stack/providers/utils/sqlstore/**'
- 'src/llama_stack/providers/utils/sqlstore/**'
- 'tests/integration/sqlstore/**'
- 'uv.lock'
- 'pyproject.toml'
@ -64,7 +68,7 @@ jobs:
- name: Upload test logs
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
path: |

View file

@ -4,13 +4,17 @@ run-name: Run the integration test suites from tests/integration in replay mode
on:
push:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
types: [opened, synchronize, reopened]
paths:
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'src/llama_stack/**'
- '!src/llama_stack/ui/**'
- 'tests/**'
- 'uv.lock'
- 'pyproject.toml'
@ -47,7 +51,7 @@ jobs:
strategy:
fail-fast: false
matrix:
client-type: [library, docker]
client-type: [library, docker, server]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}

View file

@ -4,12 +4,16 @@ run-name: Run the integration test suite with various VectorIO providers
on:
push:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths:
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'src/llama_stack/**'
- '!src/llama_stack/ui/**'
- 'tests/integration/vector_io/**'
- 'uv.lock'
- 'pyproject.toml'
@ -209,7 +213,7 @@ jobs:
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
path: |

View file

@ -5,7 +5,9 @@ run-name: Run pre-commit checks
on:
pull_request:
push:
branches: [main]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
concurrency:
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
@ -41,25 +43,43 @@ jobs:
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: 'llama_stack/ui/'
cache-dependency-path: 'src/llama_stack/ui/'
- name: Set up uv
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
- name: Install npm dependencies
run: npm ci
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
- name: Install pre-commit
run: python -m pip install pre-commit
- name: Cache pre-commit
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Run pre-commit
id: precommit
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true
run: |
set +e
pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log
status=${PIPESTATUS[0]}
echo "status=$status" >> $GITHUB_OUTPUT
exit 0
env:
SKIP: no-commit-to-branch
SKIP: no-commit-to-branch,mypy
RUFF_OUTPUT_FORMAT: github
- name: Check pre-commit results
if: steps.precommit.outcome == 'failure'
if: steps.precommit.outputs.status != '0'
run: |
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
echo "::warning::Some pre-commit hooks failed. Check the output above for details."
echo ""
echo "Failed hooks output:"
cat /tmp/precommit.log
exit 1
- name: Debug
@ -109,3 +129,30 @@ jobs:
echo "$unstaged_files"
exit 1
fi
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
- name: Sync dev + type_checking dependencies
env:
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: |
uv sync --group dev --group type_checking
# Install specific client version after sync if needed
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install ${{ steps.client-config.outputs.install-source }}
fi
- name: Run mypy (full type_checking)
run: |
set +e
uv run --group dev --group type_checking mypy
status=$?
if [ $status -ne 0 ]; then
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status

View file

@ -145,12 +145,12 @@ jobs:
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: 'llama_stack/ui/'
cache-dependency-path: 'src/llama_stack/ui/'
- name: Install npm dependencies
if: steps.check_author.outputs.authorized == 'true'
run: npm ci
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
- name: Run pre-commit
if: steps.check_author.outputs.authorized == 'true'

View file

@ -7,24 +7,24 @@ on:
branches:
- main
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- 'src/llama_stack/cli/stack/build.py'
- 'src/llama_stack/cli/stack/_build.py'
- 'src/llama_stack/core/build.*'
- 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/distributions/**'
- 'src/llama_stack/distributions/**'
- 'pyproject.toml'
- 'containers/Containerfile'
- '.dockerignore'
pull_request:
paths:
- 'llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- 'src/llama_stack/cli/stack/build.py'
- 'src/llama_stack/cli/stack/_build.py'
- 'src/llama_stack/core/build.*'
- 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml'
- 'llama_stack/distributions/**'
- 'src/llama_stack/distributions/**'
- 'pyproject.toml'
- 'containers/Containerfile'
- '.dockerignore'
@ -45,7 +45,7 @@ jobs:
- name: Generate Distribution List
id: set-matrix
run: |
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "distros=$distros" >> "$GITHUB_OUTPUT"
build:
@ -107,13 +107,13 @@ jobs:
- name: Build container image
run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
docker build . \
-f containers/Containerfile \
--build-arg INSTALL_MODE=editable \
--build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests
- name: Inspect the container image entrypoint
@ -143,17 +143,17 @@ jobs:
run: |
yq -i '
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
' llama_stack/distributions/ci-tests/build.yaml
' src/llama_stack/distributions/ci-tests/build.yaml
- name: Build UBI9 container image
run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
docker build . \
-f containers/Containerfile \
--build-arg INSTALL_MODE=editable \
--build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests-ubi9
- name: Inspect UBI9 image

View file

@ -7,22 +7,22 @@ on:
branches:
- main
paths:
- 'llama_stack/cli/stack/list_deps.py'
- 'llama_stack/cli/stack/_list_deps.py'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- 'src/llama_stack/cli/stack/list_deps.py'
- 'src/llama_stack/cli/stack/_list_deps.py'
- 'src/llama_stack/core/build.*'
- 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-list-deps.yml'
- 'llama_stack/templates/**'
- 'src/llama_stack/templates/**'
- 'pyproject.toml'
pull_request:
paths:
- 'llama_stack/cli/stack/list_deps.py'
- 'llama_stack/cli/stack/_list_deps.py'
- 'llama_stack/core/build.*'
- 'llama_stack/core/*.sh'
- 'src/llama_stack/cli/stack/list_deps.py'
- 'src/llama_stack/cli/stack/_list_deps.py'
- 'src/llama_stack/core/build.*'
- 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-list-deps.yml'
- 'llama_stack/templates/**'
- 'src/llama_stack/templates/**'
- 'pyproject.toml'
concurrency:
@ -41,7 +41,7 @@ jobs:
- name: Generate Distribution List
id: set-matrix
run: |
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "distros=$distros" >> "$GITHUB_OUTPUT"
list-deps:
@ -102,4 +102,4 @@ jobs:
USE_COPY_NOT_MOUNT: "true"
LLAMA_STACK_DIR: "."
run: |
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml

View file

@ -10,7 +10,7 @@ on:
branches:
- main
paths-ignore:
- 'llama_stack/ui/**'
- 'src/llama_stack/ui/**'
jobs:
build:
@ -24,7 +24,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install uv
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
with:
python-version: ${{ matrix.python-version }}
activate-environment: true

View file

@ -8,7 +8,7 @@ on:
pull_request:
branches: [ main ]
paths:
- 'llama_stack/**'
- 'src/llama_stack/**'
- 'tests/integration/**'
- 'uv.lock'
- 'pyproject.toml'
@ -78,7 +78,7 @@ jobs:
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test
path: |

View file

@ -8,8 +8,8 @@ on:
pull_request:
branches: [ main ]
paths:
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'src/llama_stack/**'
- '!src/llama_stack/ui/**'
- 'tests/integration/**'
- 'uv.lock'
- 'pyproject.toml'
@ -84,7 +84,7 @@ jobs:
- name: Upload all logs to artifacts
if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test
path: |

View file

@ -8,7 +8,7 @@ on:
pull_request:
branches: [ main ]
paths:
- 'llama_stack/ui/**'
- 'src/llama_stack/ui/**'
- '.github/workflows/ui-unit-tests.yml' # This workflow
workflow_dispatch:
@ -33,22 +33,22 @@ jobs:
with:
node-version: ${{ matrix.node-version }}
cache: 'npm'
cache-dependency-path: 'llama_stack/ui/package-lock.json'
cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
- name: Install dependencies
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
run: npm ci
- name: Run linting
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
run: npm run lint
- name: Run format check
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
run: npm run format:check
- name: Run unit tests
working-directory: llama_stack/ui
working-directory: src/llama_stack/ui
env:
CI: true

View file

@ -4,12 +4,16 @@ run-name: Run the unit test suite
on:
push:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request:
branches: [ main ]
branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths:
- 'llama_stack/**'
- '!llama_stack/ui/**'
- 'src/llama_stack/**'
- '!src/llama_stack/ui/**'
- 'tests/unit/**'
- 'uv.lock'
- 'pyproject.toml'
@ -45,7 +49,7 @@ jobs:
- name: Upload test results
if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with:
name: test-results-${{ matrix.python }}
path: |

3
.gitignore vendored
View file

@ -32,3 +32,6 @@ CLAUDE.md
docs/.docusaurus/
docs/node_modules/
docs/static/imported-files/
docs/docs/api-deprecated/
docs/docs/api-experimental/
docs/docs/api/

View file

@ -42,7 +42,7 @@ repos:
hooks:
- id: ruff
args: [ --fix ]
exclude: ^llama_stack/strong_typing/.*$
exclude: ^src/llama_stack/strong_typing/.*$
- id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs
@ -58,18 +58,27 @@ repos:
- id: uv-lock
- repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.16.1
rev: v1.18.2
hooks:
- id: mypy
additional_dependencies:
- uv==0.6.2
- mypy
- pytest
- rich
- types-requests
- pydantic
- httpx
pass_filenames: false
- repo: local
hooks:
- id: mypy-full
name: mypy (full type_checking)
entry: uv run --group dev --group type_checking mypy
language: system
pass_filenames: false
stages: [manual]
# - repo: https://github.com/tcort/markdown-link-check
# rev: v3.11.2
# hooks:
@ -86,7 +95,7 @@ repos:
language: python
pass_filenames: false
require_serial: true
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
- id: provider-codegen
name: Provider Codegen
additional_dependencies:
@ -95,7 +104,7 @@ repos:
language: python
pass_filenames: false
require_serial: true
files: ^llama_stack/providers/.*$
files: ^src/llama_stack/providers/.*$
- id: openapi-codegen
name: API Spec Codegen
additional_dependencies:
@ -104,7 +113,7 @@ repos:
language: python
pass_filenames: false
require_serial: true
files: ^llama_stack/apis/|^docs/openapi_generator/
files: ^src/llama_stack/apis/|^docs/openapi_generator/
- id: check-workflows-use-hashes
name: Check GitHub Actions use SHA-pinned actions
entry: ./scripts/check-workflows-use-hashes.sh
@ -120,7 +129,7 @@ repos:
pass_filenames: false
require_serial: true
always_run: true
files: ^llama_stack/.*$
files: ^src/llama_stack/.*$
- id: forbid-pytest-asyncio
name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
entry: bash
@ -150,10 +159,9 @@ repos:
name: Format & Lint UI
entry: bash ./scripts/run-ui-linter.sh
language: system
files: ^llama_stack/ui/.*\.(ts|tsx)$
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false
require_serial: true
- id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging
entry: bash
@ -172,7 +180,23 @@ repos:
exit 1
fi
exit 0
- id: fips-compliance
name: Ensure llama-stack remains FIPS compliant
entry: bash
language: system
types: [python]
pass_filenames: true
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
args:
- -c
- |
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
echo;
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
echo " These functions are not FIPS-compliant"
echo;
exit 1;
} || true
ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate

View file

@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
To run the expanded mypy configuration that CI enforces, use:
```bash
uv run pre-commit run mypy-full --hook-stage manual --all-files
```
or invoke mypy directly with all optional dependencies:
```bash
uv run --group dev --group type_checking mypy
```
```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
```

View file

@ -1,11 +1,11 @@
include pyproject.toml
include llama_stack/models/llama/llama3/tokenizer.model
include llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/core/*.sh
include llama_stack/cli/scripts/*.sh
include llama_stack/distributions/*/*.yaml
exclude llama_stack/distributions/ci-tests
include src/llama_stack/models/llama/llama3/tokenizer.model
include src/llama_stack/models/llama/llama4/tokenizer.model
include src/llama_stack/core/*.sh
include src/llama_stack/cli/scripts/*.sh
include src/llama_stack/distributions/*/*.yaml
exclude src/llama_stack/distributions/ci-tests
include tests/integration/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg
prune llama_stack/distributions/ci-tests
include src/llama_stack/models/llama/*/*.md
include src/llama_stack/tests/integration/*.jpg
prune src/llama_stack/distributions/ci-tests

View file

@ -44,14 +44,6 @@ data:
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -115,13 +107,21 @@ data:
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
stores:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models:
- metadata:
embedding_dimension: 768

View file

@ -36,14 +36,6 @@ providers:
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
@ -108,6 +100,9 @@ storage:
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata:

View file

@ -1,610 +0,0 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
beta:
subresources:
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

View file

@ -15,6 +15,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -4212,6 +4347,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -5474,11 +5934,44 @@ components:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage:
type: object
properties:
@ -5499,6 +5992,10 @@ components:
default: input_image
description: >-
Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url:
type: string
description: (Optional) URL of the image content
@ -6735,14 +7232,9 @@ components:
Error details for failed OpenAI response requests.
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch:
type: object
@ -6898,6 +7390,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -6971,6 +7467,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText:
type: object
properties:
@ -7228,6 +7748,10 @@ components:
model:
type: string
description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions:
type: string
previous_response_id:
@ -7305,6 +7829,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -9867,7 +10395,7 @@ components:
$ref: '#/components/schemas/RAGDocument'
description: >-
List of documents to index in the RAG system
vector_db_id:
vector_store_id:
type: string
description: >-
ID of the vector database to store the document embeddings
@ -9878,7 +10406,7 @@ components:
additionalProperties: false
required:
- documents
- vector_db_id
- vector_store_id
- chunk_size_in_tokens
title: InsertRequest
DefaultRAGQueryGeneratorConfig:
@ -10049,7 +10577,7 @@ components:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The query content to search for in the indexed documents
vector_db_ids:
vector_store_ids:
type: array
items:
type: string
@ -10062,7 +10590,7 @@ components:
additionalProperties: false
required:
- content
- vector_db_ids
- vector_store_ids
title: QueryRequest
RAGQueryResult:
type: object
@ -10190,6 +10718,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -10210,10 +10742,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -10222,6 +10750,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -10286,7 +10815,7 @@ components:
InsertChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to insert the chunks into.
@ -10305,13 +10834,13 @@ components:
description: The time to live of the chunks.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- chunks
title: InsertChunksRequest
QueryChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to query.
@ -10331,7 +10860,7 @@ components:
description: The parameters of the query.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- query
title: QueryChunksRequest
QueryChunksResponse:
@ -11600,7 +12129,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
@ -11850,7 +12378,7 @@ components:
description: Type of the step in an agent turn.
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
vector_store_ids:
type: string
description: >-
The IDs of the vector databases to retrieve context from.
@ -11863,7 +12391,7 @@ components:
- turn_id
- step_id
- step_type
- vector_db_ids
- vector_store_ids
- inserted_context
title: MemoryRetrievalStep
description: >-
@ -13460,6 +13988,19 @@ tags:
description: >-
APIs for creating and interacting with agentic systems.
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: Conversations
@ -13534,6 +14075,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- Conversations
- DatasetIO

View file

@ -58,13 +58,21 @@ storage:
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
references:
stores:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}

View file

@ -113,13 +113,21 @@ data:
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
references:
stores:
metadata:
backend: kv_default
namespace: registry
inference:
backend: sql_default
table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models:
- metadata:
embedding_dimension: 768

View file

@ -106,6 +106,9 @@ storage:
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata:

View file

@ -79,6 +79,33 @@ docker run \
--port $LLAMA_STACK_PORT
```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
--gpu all \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv
Make sure you have the Llama Stack CLI available.

View file

@ -127,13 +127,39 @@ docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \
-v ~/.llama:/root/.llama \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT
```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.

View file

@ -0,0 +1,27 @@
---
description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
sidebar_label: Remote - Openai
title: remote::openai
---
# remote::openai
## Description
OpenAI Files API provider for managing files through OpenAI's native file storage service.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
metadata_store:
table_name: openai_files_metadata
backend: sql_default
```

View file

@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration

View file

@ -72,14 +72,14 @@ description: |
Example with hybrid search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
)
# Using RRF ranker
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={
"mode": "hybrid",
@ -91,7 +91,7 @@ description: |
# Using weighted ranker
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={
"mode": "hybrid",
@ -105,7 +105,7 @@ description: |
Example with explicit vector search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
)
@ -114,7 +114,7 @@ description: |
Example with keyword search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
)
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
Example with hybrid search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
)
# Using RRF ranker
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={
"mode": "hybrid",
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(
# Using weighted ranker
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={
"mode": "hybrid",
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
Example with explicit vector search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
)
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
Example with keyword search:
```python
response = await vector_io.query_chunks(
vector_db_id="my_db",
vector_store_id="my_db",
query="your query here",
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
)

File diff suppressed because it is too large Load diff

View file

@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
'providers/eval/remote_nvidia'
],
},
{
type: 'category',
label: 'Telemetry',
collapsed: true,
items: [
'providers/telemetry/index',
'providers/telemetry/inline_meta-reference'
],
},
{
type: 'category',
label: 'Batches',

View file

@ -1414,6 +1414,193 @@
"deprecated": true
}
},
"/v1/openai/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": true
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/chat/completions": {
"get": {
"responses": {
@ -3901,7 +4088,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
@ -4391,7 +4577,7 @@
"const": "memory_retrieval",
"default": "memory_retrieval"
},
"vector_db_ids": {
"vector_store_ids": {
"type": "string",
"description": "The IDs of the vector databases to retrieve context from."
},
@ -4405,7 +4591,7 @@
"turn_id",
"step_id",
"step_type",
"vector_db_ids",
"vector_store_ids",
"inserted_context"
],
"title": "MemoryRetrievalStep",
@ -6402,6 +6588,451 @@
"title": "Job",
"description": "A job execution instance with status tracking."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -8527,29 +9158,14 @@
"OpenAIResponseInput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
"$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
@ -8592,16 +9208,53 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
}
},
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
@ -8629,6 +9282,10 @@
"default": "input_image",
"description": "Content type identifier, always \"input_image\""
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": {
"type": "string",
"description": "(Optional) URL of the image content"
@ -8992,6 +9649,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -9416,6 +10077,32 @@
"title": "OpenAIResponseOutputMessageWebSearchToolCall",
"description": "Web search tool call output message for OpenAI responses."
},
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": {
"type": "object",
"properties": {
@ -9786,6 +10473,10 @@
"type": "string",
"description": "The underlying LLM used for completions."
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": {
"type": "string"
},
@ -9874,6 +10565,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -13442,6 +14137,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Benchmarks",
"description": ""
@ -13492,6 +14192,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Benchmarks",
"DatasetIO",
"Datasets",

View file

@ -1012,6 +1012,141 @@ paths:
schema:
type: string
deprecated: true
/v1/openai/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: true
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: true
/v1/openai/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/chat/completions:
get:
responses:
@ -2862,7 +2997,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
@ -3253,7 +3387,7 @@ components:
description: Type of the step in an agent turn.
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
vector_store_ids:
type: string
description: >-
The IDs of the vector databases to retrieve context from.
@ -3266,7 +3400,7 @@ components:
- turn_id
- step_id
- step_type
- vector_db_ids
- vector_store_ids
- inserted_context
title: MemoryRetrievalStep
description: >-
@ -4737,6 +4871,331 @@ components:
title: Job
description: >-
A job execution instance with status tracking.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -6370,14 +6829,9 @@ components:
Error details for failed OpenAI response requests.
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage'
"OpenAIResponseInputFunctionToolCallOutput":
type: object
@ -6408,11 +6862,44 @@ components:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage:
type: object
properties:
@ -6433,6 +6920,10 @@ components:
default: input_image
description: >-
Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url:
type: string
description: (Optional) URL of the image content
@ -6703,6 +7194,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -7042,6 +7537,30 @@ components:
OpenAIResponseOutputMessageWebSearchToolCall
description: >-
Web search tool call output message for OpenAI responses.
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText:
type: object
properties:
@ -7299,6 +7818,10 @@ components:
model:
type: string
description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions:
type: string
previous_response_id:
@ -7376,6 +7899,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -10196,6 +10723,19 @@ tags:
- **Responses API**: Use the stable v1 Responses API endpoints
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: DatasetIO
@ -10241,6 +10781,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- DatasetIO
- Datasets

View file

@ -2376,7 +2376,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
@ -2866,7 +2865,7 @@
"const": "memory_retrieval",
"default": "memory_retrieval"
},
"vector_db_ids": {
"vector_store_ids": {
"type": "string",
"description": "The IDs of the vector databases to retrieve context from."
},
@ -2880,7 +2879,7 @@
"turn_id",
"step_id",
"step_type",
"vector_db_ids",
"vector_store_ids",
"inserted_context"
],
"title": "MemoryRetrievalStep",

View file

@ -1695,7 +1695,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
@ -2086,7 +2085,7 @@ components:
description: Type of the step in an agent turn.
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
vector_store_ids:
type: string
description: >-
The IDs of the vector databases to retrieve context from.
@ -2099,7 +2098,7 @@ components:
- turn_id
- step_id
- step_type
- vector_db_ids
- vector_store_ids
- inserted_context
title: MemoryRetrievalStep
description: >-

View file

@ -40,6 +40,193 @@
}
],
"paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": {
"get": {
"responses": {
@ -4005,6 +4192,451 @@
"title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -5696,16 +6328,53 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
}
},
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
@ -5733,6 +6402,10 @@
"default": "input_image",
"description": "Content type identifier, always \"input_image\""
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": {
"type": "string",
"description": "(Optional) URL of the image content"
@ -7305,29 +7978,14 @@
"OpenAIResponseInput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
"$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
@ -7536,6 +8194,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -7631,6 +8293,32 @@
}
}
},
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": {
"type": "object",
"properties": {
@ -8001,6 +8689,10 @@
"type": "string",
"description": "The underlying LLM used for completions."
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": {
"type": "string"
},
@ -8089,6 +8781,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -11427,7 +12123,7 @@
},
"description": "List of documents to index in the RAG system"
},
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "ID of the vector database to store the document embeddings"
},
@ -11439,7 +12135,7 @@
"additionalProperties": false,
"required": [
"documents",
"vector_db_id",
"vector_store_id",
"chunk_size_in_tokens"
],
"title": "InsertRequest"
@ -11630,7 +12326,7 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The query content to search for in the indexed documents"
},
"vector_db_ids": {
"vector_store_ids": {
"type": "array",
"items": {
"type": "string"
@ -11645,7 +12341,7 @@
"additionalProperties": false,
"required": [
"content",
"vector_db_ids"
"vector_store_ids"
],
"title": "QueryRequest"
},
@ -11833,6 +12529,10 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": {
"type": "object",
"additionalProperties": {
@ -11866,10 +12566,6 @@
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
},
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11878,6 +12574,7 @@
"additionalProperties": false,
"required": [
"content",
"chunk_id",
"metadata"
],
"title": "Chunk",
@ -11938,7 +12635,7 @@
"InsertChunksRequest": {
"type": "object",
"properties": {
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "The identifier of the vector database to insert the chunks into."
},
@ -11956,7 +12653,7 @@
},
"additionalProperties": false,
"required": [
"vector_db_id",
"vector_store_id",
"chunks"
],
"title": "InsertChunksRequest"
@ -11964,7 +12661,7 @@
"QueryChunksRequest": {
"type": "object",
"properties": {
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "The identifier of the vector database to query."
},
@ -12001,7 +12698,7 @@
},
"additionalProperties": false,
"required": [
"vector_db_id",
"vector_store_id",
"query"
],
"title": "QueryChunksRequest"
@ -13224,6 +13921,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Conversations",
"description": "Protocol for conversation management operations.",
@ -13297,6 +13999,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Conversations",
"Files",
"Inference",

View file

@ -12,6 +12,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -2999,6 +3134,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -4261,11 +4721,44 @@ components:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage:
type: object
properties:
@ -4286,6 +4779,10 @@ components:
default: input_image
description: >-
Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url:
type: string
description: (Optional) URL of the image content
@ -5522,14 +6019,9 @@ components:
Error details for failed OpenAI response requests.
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch:
type: object
@ -5685,6 +6177,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -5758,6 +6254,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText:
type: object
properties:
@ -6015,6 +6535,10 @@ components:
model:
type: string
description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions:
type: string
previous_response_id:
@ -6092,6 +6616,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -8654,7 +9182,7 @@ components:
$ref: '#/components/schemas/RAGDocument'
description: >-
List of documents to index in the RAG system
vector_db_id:
vector_store_id:
type: string
description: >-
ID of the vector database to store the document embeddings
@ -8665,7 +9193,7 @@ components:
additionalProperties: false
required:
- documents
- vector_db_id
- vector_store_id
- chunk_size_in_tokens
title: InsertRequest
DefaultRAGQueryGeneratorConfig:
@ -8836,7 +9364,7 @@ components:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The query content to search for in the indexed documents
vector_db_ids:
vector_store_ids:
type: array
items:
type: string
@ -8849,7 +9377,7 @@ components:
additionalProperties: false
required:
- content
- vector_db_ids
- vector_store_ids
title: QueryRequest
RAGQueryResult:
type: object
@ -8977,6 +9505,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -8997,10 +9529,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -9009,6 +9537,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -9073,7 +9602,7 @@ components:
InsertChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to insert the chunks into.
@ -9092,13 +9621,13 @@ components:
description: The time to live of the chunks.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- chunks
title: InsertChunksRequest
QueryChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to query.
@ -9118,7 +9647,7 @@ components:
description: The parameters of the query.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- query
title: QueryChunksRequest
QueryChunksResponse:
@ -10075,6 +10604,19 @@ tags:
- `background`
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Conversations
description: >-
Protocol for conversation management operations.
@ -10137,6 +10679,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Conversations
- Files
- Inference

View file

@ -40,6 +40,193 @@
}
],
"paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": {
"get": {
"responses": {
@ -5677,6 +5864,451 @@
"title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807."
},
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": {
"type": "string",
"enum": [
@ -7368,16 +8000,53 @@
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
],
"discriminator": {
"propertyName": "type",
"mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
}
}
},
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": {
"type": "object",
"properties": {
@ -7405,6 +8074,10 @@
"default": "input_image",
"description": "Content type identifier, always \"input_image\""
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": {
"type": "string",
"description": "(Optional) URL of the image content"
@ -8977,29 +9650,14 @@
"OpenAIResponseInput": {
"oneOf": [
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
"$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{
"$ref": "#/components/schemas/OpenAIResponseMessage"
}
@ -9208,6 +9866,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -9303,6 +9965,32 @@
}
}
},
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": {
"type": "object",
"properties": {
@ -9673,6 +10361,10 @@
"type": "string",
"description": "The underlying LLM used for completions."
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": {
"type": "string"
},
@ -9761,6 +10453,10 @@
"type": "string",
"description": "(Optional) ID of the previous response in a conversation"
},
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": {
"type": "string",
"description": "Current status of the response generation"
@ -13099,7 +13795,7 @@
},
"description": "List of documents to index in the RAG system"
},
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "ID of the vector database to store the document embeddings"
},
@ -13111,7 +13807,7 @@
"additionalProperties": false,
"required": [
"documents",
"vector_db_id",
"vector_store_id",
"chunk_size_in_tokens"
],
"title": "InsertRequest"
@ -13302,7 +13998,7 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The query content to search for in the indexed documents"
},
"vector_db_ids": {
"vector_store_ids": {
"type": "array",
"items": {
"type": "string"
@ -13317,7 +14013,7 @@
"additionalProperties": false,
"required": [
"content",
"vector_db_ids"
"vector_store_ids"
],
"title": "QueryRequest"
},
@ -13505,6 +14201,10 @@
"$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types."
},
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": {
"type": "object",
"additionalProperties": {
@ -13538,10 +14238,6 @@
},
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
},
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13550,6 +14246,7 @@
"additionalProperties": false,
"required": [
"content",
"chunk_id",
"metadata"
],
"title": "Chunk",
@ -13610,7 +14307,7 @@
"InsertChunksRequest": {
"type": "object",
"properties": {
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "The identifier of the vector database to insert the chunks into."
},
@ -13628,7 +14325,7 @@
},
"additionalProperties": false,
"required": [
"vector_db_id",
"vector_store_id",
"chunks"
],
"title": "InsertChunksRequest"
@ -13636,7 +14333,7 @@
"QueryChunksRequest": {
"type": "object",
"properties": {
"vector_db_id": {
"vector_store_id": {
"type": "string",
"description": "The identifier of the vector database to query."
},
@ -13673,7 +14370,7 @@
},
"additionalProperties": false,
"required": [
"vector_db_id",
"vector_store_id",
"query"
],
"title": "QueryChunksRequest"
@ -15452,7 +16149,6 @@
},
"max_tokens": {
"type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
},
"repetition_penalty": {
@ -15735,7 +16431,7 @@
"const": "memory_retrieval",
"default": "memory_retrieval"
},
"vector_db_ids": {
"vector_store_ids": {
"type": "string",
"description": "The IDs of the vector databases to retrieve context from."
},
@ -15749,7 +16445,7 @@
"turn_id",
"step_id",
"step_type",
"vector_db_ids",
"vector_store_ids",
"inserted_context"
],
"title": "MemoryRetrievalStep",
@ -17897,6 +18593,11 @@
"description": "APIs for creating and interacting with agentic systems.",
"x-displayName": "Agents"
},
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{
"name": "Benchmarks",
"description": ""
@ -17991,6 +18692,7 @@
"name": "Operations",
"tags": [
"Agents",
"Batches",
"Benchmarks",
"Conversations",
"DatasetIO",

View file

@ -15,6 +15,141 @@ info:
servers:
- url: http://any-hosted-llama-stack.com
paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions:
get:
responses:
@ -4212,6 +4347,331 @@ components:
title: Error
description: >-
Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order:
type: string
enum:
@ -5474,11 +5934,44 @@ components:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator:
propertyName: type
mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage:
type: object
properties:
@ -5499,6 +5992,10 @@ components:
default: input_image
description: >-
Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url:
type: string
description: (Optional) URL of the image content
@ -6735,14 +7232,9 @@ components:
Error details for failed OpenAI response requests.
OpenAIResponseInput:
oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch:
type: object
@ -6898,6 +7390,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -6971,6 +7467,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText:
type: object
properties:
@ -7228,6 +7748,10 @@ components:
model:
type: string
description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions:
type: string
previous_response_id:
@ -7305,6 +7829,10 @@ components:
type: string
description: >-
(Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status:
type: string
description: >-
@ -9867,7 +10395,7 @@ components:
$ref: '#/components/schemas/RAGDocument'
description: >-
List of documents to index in the RAG system
vector_db_id:
vector_store_id:
type: string
description: >-
ID of the vector database to store the document embeddings
@ -9878,7 +10406,7 @@ components:
additionalProperties: false
required:
- documents
- vector_db_id
- vector_store_id
- chunk_size_in_tokens
title: InsertRequest
DefaultRAGQueryGeneratorConfig:
@ -10049,7 +10577,7 @@ components:
$ref: '#/components/schemas/InterleavedContent'
description: >-
The query content to search for in the indexed documents
vector_db_ids:
vector_store_ids:
type: array
items:
type: string
@ -10062,7 +10590,7 @@ components:
additionalProperties: false
required:
- content
- vector_db_ids
- vector_store_ids
title: QueryRequest
RAGQueryResult:
type: object
@ -10190,6 +10718,10 @@ components:
description: >-
The content of the chunk, which can be interleaved text, images, or other
types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata:
type: object
additionalProperties:
@ -10210,10 +10742,6 @@ components:
description: >-
Optional embedding for the chunk. If not provided, it will be computed
later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata'
description: >-
@ -10222,6 +10750,7 @@ components:
additionalProperties: false
required:
- content
- chunk_id
- metadata
title: Chunk
description: >-
@ -10286,7 +10815,7 @@ components:
InsertChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to insert the chunks into.
@ -10305,13 +10834,13 @@ components:
description: The time to live of the chunks.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- chunks
title: InsertChunksRequest
QueryChunksRequest:
type: object
properties:
vector_db_id:
vector_store_id:
type: string
description: >-
The identifier of the vector database to query.
@ -10331,7 +10860,7 @@ components:
description: The parameters of the query.
additionalProperties: false
required:
- vector_db_id
- vector_store_id
- query
title: QueryChunksRequest
QueryChunksResponse:
@ -11600,7 +12129,6 @@ components:
description: The sampling strategy.
max_tokens:
type: integer
default: 0
description: >-
The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's
@ -11850,7 +12378,7 @@ components:
description: Type of the step in an agent turn.
const: memory_retrieval
default: memory_retrieval
vector_db_ids:
vector_store_ids:
type: string
description: >-
The IDs of the vector databases to retrieve context from.
@ -11863,7 +12391,7 @@ components:
- turn_id
- step_id
- step_type
- vector_db_ids
- vector_store_ids
- inserted_context
title: MemoryRetrievalStep
description: >-
@ -13460,6 +13988,19 @@ tags:
description: >-
APIs for creating and interacting with agentic systems.
x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks
description: ''
- name: Conversations
@ -13534,6 +14075,7 @@ x-tagGroups:
- name: Operations
tags:
- Agents
- Batches
- Benchmarks
- Conversations
- DatasetIO

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .telemetry import *

View file

@ -1,250 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import threading
from typing import Any
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from llama_stack.apis.telemetry import (
Event,
MetricEvent,
SpanEndPayload,
SpanStartPayload,
SpanStatus,
StructuredLogEvent,
UnstructuredLogEvent,
)
from llama_stack.apis.telemetry import (
Telemetry as TelemetryBase,
)
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
from llama_stack.log import get_logger
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"active_spans": {},
"counters": {},
"gauges": {},
"up_down_counters": {},
}
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry")
def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span:
return span.is_recording()
class Telemetry(TelemetryBase):
def __init__(self) -> None:
self.meter = None
global _TRACER_PROVIDER
# Initialize the correct span processor based on the provider state.
# This is needed since once the span processor is set, it cannot be unset.
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
# Since the library client can be recreated multiple times in a notebook,
# the kernel will hold on to the span processor and cause duplicate spans to be written.
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
if _TRACER_PROVIDER is None:
provider = TracerProvider()
trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider
# Use single OTLP endpoint for all telemetry signals
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
self.is_otel_endpoint_set = True
else:
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
self.is_otel_endpoint_set = False
self.meter = metrics.get_meter(__name__)
self._lock = _global_lock
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
if self.is_otel_endpoint_set:
trace.get_tracer_provider().force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent):
self._log_unstructured(event, ttl_seconds)
elif isinstance(event, MetricEvent):
self._log_metric(event)
elif isinstance(event, StructuredLogEvent):
self._log_structured(event, ttl_seconds)
else:
raise ValueError(f"Unknown event type: {event}")
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
# Use global storage instead of instance storage
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=event.type.value,
attributes={
"message": event.message,
"severity": event.severity.value,
"__ttl__": ttl_seconds,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
else:
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["counters"]:
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
name=name,
unit=unit,
description=f"Counter for {name}",
)
return _GLOBAL_STORAGE["counters"][name]
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["gauges"]:
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
name=name,
unit=unit,
description=f"Gauge for {name}",
)
return _GLOBAL_STORAGE["gauges"][name]
def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span
try:
with self._lock:
# Only try to add to span if we have a valid span_id
if event.span_id:
try:
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=f"metric.{event.metric}",
attributes={
"value": event.value,
"unit": event.unit,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
except (ValueError, KeyError):
# Invalid span_id or span not found, but we already logged to console above
pass
except Exception:
# Lock acquisition failed
logger.debug("Failed to acquire lock to add metric to span")
# Log to OpenTelemetry meter if available
if self.meter is None:
return
if isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=event.attributes)
elif isinstance(event.value, float):
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
up_down_counter.add(event.value, attributes=event.attributes)
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["up_down_counters"]:
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
name=name,
unit=unit,
description=f"UpDownCounter for {name}",
)
return _GLOBAL_STORAGE["up_down_counters"][name]
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
span_id = int(event.span_id, 16)
tracer = trace.get_tracer(__name__)
if event.attributes is None:
event.attributes = {}
event.attributes["__ttl__"] = ttl_seconds
# Extract these W3C trace context attributes so they are not written to
# underlying storage, as we just need them to propagate the trace context.
traceparent = event.attributes.pop("traceparent", None)
tracestate = event.attributes.pop("tracestate", None)
if traceparent:
# If we have a traceparent header value, we're not the root span.
for root_attribute in ROOT_SPAN_MARKERS:
event.attributes.pop(root_attribute, None)
if isinstance(event.payload, SpanStartPayload):
# Check if span already exists to prevent duplicates
if span_id in _GLOBAL_STORAGE["active_spans"]:
return
context = None
if event.payload.parent_span_id:
parent_span_id = int(event.payload.parent_span_id, 16)
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
context = trace.set_span_in_context(parent_span)
elif traceparent:
carrier = {
"traceparent": traceparent,
"tracestate": tracestate,
}
context = TraceContextTextMapPropagator().extract(carrier=carrier)
span = tracer.start_span(
name=event.payload.name,
context=context,
attributes=event.attributes or {},
)
_GLOBAL_STORAGE["active_spans"][span_id] = span
elif isinstance(event.payload, SpanEndPayload):
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
if event.attributes:
span.set_attributes(event.attributes)
status = (
trace.Status(status_code=trace.StatusCode.OK)
if event.payload.status == SpanStatus.OK
else trace.Status(status_code=trace.StatusCode.ERROR)
)
span.set_status(status)
span.end()
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
else:
raise ValueError(f"Unknown structured log event: {event}")

View file

@ -1,40 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import AsyncGenerator
from contextvars import ContextVar
def preserve_contexts_async_generator[T](
gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
) -> AsyncGenerator[T, None]:
"""
Wraps an async generator to preserve context variables across iterations.
This is needed because we start a new asyncio event loop for each streaming request,
and we need to preserve the context across the event loop boundary.
"""
# Capture initial context values
initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
async def wrapper() -> AsyncGenerator[T, None]:
while True:
try:
# Restore context values before any await
for context_var in context_vars:
context_var.set(initial_context_values[context_var.name])
item = await gen.__anext__()
# Update our tracked values with any changes made during this iteration
for context_var in context_vars:
initial_context_values[context_var.name] = context_var.get()
yield item
except StopAsyncIteration:
break
return wrapper()

View file

@ -1,61 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from . import NVIDIAConfig
from .utils import _is_nvidia_hosted
logger = get_logger(name=__name__, category="inference::nvidia")
class NVIDIAInferenceAdapter(OpenAIMixin):
config: NVIDIAConfig
"""
NVIDIA Inference Adapter for Llama Stack.
"""
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
embedding_model_metadata: dict[str, dict[str, int]] = {
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
}
async def initialize(self) -> None:
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
if _is_nvidia_hosted(self.config):
if not self.config.auth_credential:
raise RuntimeError(
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
)
def get_api_key(self) -> str:
"""
Get the API key for OpenAI mixin.
:return: The NVIDIA API key
"""
if self.config.auth_credential:
return self.config.auth_credential.get_secret_value()
if not _is_nvidia_hosted(self.config):
return "NO KEY REQUIRED"
return None
def get_base_url(self) -> str:
"""
Get the base URL for OpenAI mixin.
:return: The NVIDIA API base URL
"""
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url

View file

@ -31,7 +31,7 @@ dependencies = [
"jinja2>=3.1.6",
"jsonschema",
"llama-stack-client>=0.3.0",
"openai>=1.107", # for expires_after support
"openai>=2.5.0",
"prompt-toolkit",
"python-dotenv",
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
@ -67,17 +67,48 @@ dev = [
"pytest-cov",
"pytest-html",
"pytest-json-report",
"pytest-socket", # For blocking network access in unit tests
"nbval", # For notebook testing
"pytest-socket", # For blocking network access in unit tests
"nbval", # For notebook testing
"black",
"ruff",
"mypy",
"pre-commit",
"ruamel.yaml", # needed for openapi generator
]
# Type checking dependencies - includes type stubs and optional runtime dependencies
# needed for complete mypy coverage across all optional features
type_checking = [
"types-requests",
"types-setuptools",
"pre-commit",
"ruamel.yaml", # needed for openapi generator
"types-jsonschema",
"pandas-stubs",
"types-psutil",
"types-tqdm",
"boto3-stubs[s3]",
"streamlit",
"streamlit-option-menu",
"pandas",
"anthropic",
"databricks-sdk",
"fairscale",
"torchtune",
"trl",
"peft",
"datasets",
"together",
"nest-asyncio",
"pymongo",
"torchvision",
"sqlite-vec",
"faiss-cpu",
"lm-format-enforcer",
"mcp",
"ollama",
]
# These are the dependencies required for running unit tests.
unit = [
"anthropic",
"databricks-sdk",
"sqlite-vec",
"ollama",
"aiosqlite",
@ -151,7 +182,7 @@ llama = "llama_stack.cli.llama:main"
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
[tool.setuptools.packages.find]
where = ["."]
where = ["src"]
include = ["llama_stack", "llama_stack.*"]
[[tool.uv.index]]
@ -218,17 +249,17 @@ unfixable = [
# Ignore the following errors for the following files
[tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
"RUF001",
"PLE2515",
]
"llama_stack/apis/**/__init__.py" = [
"src/llama_stack/apis/**/__init__.py" = [
"F403",
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
[tool.mypy]
mypy_path = ["llama_stack"]
mypy_path = ["src"]
packages = ["llama_stack"]
plugins = ['pydantic.mypy']
disable_error_code = []
@ -240,82 +271,91 @@ follow_imports = "silent"
# to exclude the entire directory.
exclude = [
# As we fix more and more of these, we should remove them from the list
"^llama_stack.core/build\\.py$",
"^llama_stack.core/client\\.py$",
"^llama_stack.core/request_headers\\.py$",
"^llama_stack.core/routers/",
"^llama_stack.core/routing_tables/",
"^llama_stack.core/server/endpoints\\.py$",
"^llama_stack.core/server/server\\.py$",
"^llama_stack.core/stack\\.py$",
"^llama_stack.core/store/registry\\.py$",
"^llama_stack.core/utils/exec\\.py$",
"^llama_stack.core/utils/prompt_for_config\\.py$",
"^llama_stack/models/llama/llama3/interface\\.py$",
"^llama_stack/models/llama/llama3/tokenizer\\.py$",
"^llama_stack/models/llama/llama3/tool_utils\\.py$",
"^llama_stack/providers/inline/agents/meta_reference/",
"^llama_stack/providers/inline/datasetio/localfs/",
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^llama_stack/models/llama/llama3/generation\\.py$",
"^llama_stack/models/llama/llama3/multimodal/model\\.py$",
"^llama_stack/models/llama/llama4/",
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
"^llama_stack/providers/inline/safety/code_scanner/",
"^llama_stack/providers/inline/safety/llama_guard/",
"^llama_stack/providers/inline/scoring/basic/",
"^llama_stack/providers/inline/scoring/braintrust/",
"^llama_stack/providers/inline/scoring/llm_as_judge/",
"^llama_stack/providers/remote/agents/sample/",
"^llama_stack/providers/remote/datasetio/huggingface/",
"^llama_stack/providers/remote/datasetio/nvidia/",
"^llama_stack/providers/remote/inference/bedrock/",
"^llama_stack/providers/remote/inference/nvidia/",
"^llama_stack/providers/remote/inference/passthrough/",
"^llama_stack/providers/remote/inference/runpod/",
"^llama_stack/providers/remote/inference/tgi/",
"^llama_stack/providers/remote/inference/watsonx/",
"^llama_stack/providers/remote/safety/bedrock/",
"^llama_stack/providers/remote/safety/nvidia/",
"^llama_stack/providers/remote/safety/sambanova/",
"^llama_stack/providers/remote/safety/sample/",
"^llama_stack/providers/remote/tool_runtime/bing_search/",
"^llama_stack/providers/remote/tool_runtime/brave_search/",
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
"^llama_stack/providers/remote/tool_runtime/tavily_search/",
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
"^llama_stack/providers/remote/post_training/nvidia/",
"^llama_stack/providers/remote/vector_io/chroma/",
"^llama_stack/providers/remote/vector_io/milvus/",
"^llama_stack/providers/remote/vector_io/pgvector/",
"^llama_stack/providers/remote/vector_io/qdrant/",
"^llama_stack/providers/remote/vector_io/sample/",
"^llama_stack/providers/remote/vector_io/weaviate/",
"^llama_stack/providers/utils/bedrock/client\\.py$",
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
"^llama_stack/providers/utils/inference/model_registry\\.py$",
"^llama_stack/providers/utils/inference/openai_compat\\.py$",
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
"^llama_stack/providers/utils/kvstore/kvstore\\.py$",
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
"^llama_stack/providers/utils/memory/vector_store\\.py$",
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
"^llama_stack/providers/utils/telemetry/tracing\\.py$",
"^llama_stack/strong_typing/auxiliary\\.py$",
"^llama_stack/distributions/template\\.py$",
"^src/llama_stack/core/build\\.py$",
"^src/llama_stack/core/client\\.py$",
"^src/llama_stack/core/request_headers\\.py$",
"^src/llama_stack/core/routers/",
"^src/llama_stack/core/routing_tables/",
"^src/llama_stack/core/server/endpoints\\.py$",
"^src/llama_stack/core/server/server\\.py$",
"^src/llama_stack/core/stack\\.py$",
"^src/llama_stack/core/store/registry\\.py$",
"^src/llama_stack/core/utils/exec\\.py$",
"^src/llama_stack/core/utils/prompt_for_config\\.py$",
"^src/llama_stack/models/llama/llama3/interface\\.py$",
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^src/llama_stack/models/llama/llama3/generation\\.py$",
"^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
"^src/llama_stack/models/llama/llama4/",
"^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
"^src/llama_stack/providers/inline/safety/code_scanner/",
"^src/llama_stack/providers/inline/safety/llama_guard/",
"^src/llama_stack/providers/inline/scoring/basic/",
"^src/llama_stack/providers/inline/scoring/braintrust/",
"^src/llama_stack/providers/inline/scoring/llm_as_judge/",
"^src/llama_stack/providers/remote/agents/sample/",
"^src/llama_stack/providers/remote/datasetio/huggingface/",
"^src/llama_stack/providers/remote/datasetio/nvidia/",
"^src/llama_stack/providers/remote/inference/bedrock/",
"^src/llama_stack/providers/remote/inference/nvidia/",
"^src/llama_stack/providers/remote/inference/passthrough/",
"^src/llama_stack/providers/remote/inference/runpod/",
"^src/llama_stack/providers/remote/inference/tgi/",
"^src/llama_stack/providers/remote/inference/watsonx/",
"^src/llama_stack/providers/remote/safety/bedrock/",
"^src/llama_stack/providers/remote/safety/nvidia/",
"^src/llama_stack/providers/remote/safety/sambanova/",
"^src/llama_stack/providers/remote/safety/sample/",
"^src/llama_stack/providers/remote/tool_runtime/bing_search/",
"^src/llama_stack/providers/remote/tool_runtime/brave_search/",
"^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
"^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
"^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
"^src/llama_stack/providers/remote/post_training/nvidia/",
"^src/llama_stack/providers/remote/vector_io/chroma/",
"^src/llama_stack/providers/remote/vector_io/milvus/",
"^src/llama_stack/providers/remote/vector_io/pgvector/",
"^src/llama_stack/providers/remote/vector_io/qdrant/",
"^src/llama_stack/providers/remote/vector_io/sample/",
"^src/llama_stack/providers/remote/vector_io/weaviate/",
"^src/llama_stack/providers/utils/bedrock/client\\.py$",
"^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
"^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
"^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
"^src/llama_stack/providers/utils/inference/model_registry\\.py$",
"^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
"^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
"^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
"^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
"^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
"^src/llama_stack/providers/utils/memory/vector_store\\.py$",
"^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
"^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
"^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
"^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
"^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
"^src/llama_stack/strong_typing/auxiliary\\.py$",
"^src/llama_stack/distributions/template\\.py$",
]
[[tool.mypy.overrides]]
# packages that lack typing annotations, do not have stubs, or are unavailable.
module = ["yaml", "fire"]
module = [
"yaml",
"fire",
"torchtune.*",
"fairscale.*",
"torchvision.*",
"datasets",
"nest_asyncio",
"streamlit_option_menu",
"lmformatenforcer.*",
]
ignore_missing_imports = true
[tool.pydantic-mypy]

View file

@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
exit 1
fi
PACKAGE_DIR="${1:-llama_stack}"
PACKAGE_DIR="${1:-src/llama_stack}"
if [ ! -d "$PACKAGE_DIR" ]; then
echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"

View file

@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke
if template_func := getattr(module, "get_distribution_template", None):
distro = template_func()
yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name
yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
change_tracker.add_paths(yaml_output_dir, doc_output_dir)
distro.save_distribution(
@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
def main():
distros_dir = REPO_ROOT / "llama_stack" / "distributions"
distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
change_tracker = ChangedPathTracker()
with Progress(

View file

@ -30,8 +30,10 @@ materialize_telemetry_configs() {
local otel_cfg="${dest}/otel-collector-config.yaml"
local prom_cfg="${dest}/prometheus.yml"
local graf_cfg="${dest}/grafana-datasources.yaml"
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
local dash_json="${dest}/llama-stack-dashboard.json"
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
if [ -e "$asset" ]; then
die "Telemetry asset ${asset} already exists; refusing to overwrite"
fi
@ -103,6 +105,7 @@ datasources:
type: prometheus
access: proxy
url: http://prometheus:9090
uid: prometheus
isDefault: true
editable: true
@ -112,6 +115,224 @@ datasources:
url: http://jaeger:16686
editable: true
EOF
cat <<'EOF' > "$graf_dash_cfg"
apiVersion: 1
providers:
- name: 'Llama Stack'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
EOF
# Copy the dashboard JSON inline to avoid line-length issues
cat > "$dash_json" <<'DASHBOARD_JSON'
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{"color": "green", "value": null}]
}
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"id": 1,
"options": {
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"expr": "llama_stack_completion_tokens_total",
"legendFormat": "{{model_id}} ({{provider_id}})",
"refId": "A"
}
],
"title": "Completion Tokens",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"id": 2,
"options": {
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
],
"title": "Prompt & Total Tokens",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "ms"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"id": 3,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
],
"title": "HTTP Request Duration (p95, p99)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
],
"title": "Total Requests",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
],
"title": "Active Requests",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "reqps"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
"id": 6,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "Bps"
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
"id": 7,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
],
"title": "Request/Response Sizes",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 38,
"tags": ["llama-stack"],
"templating": {"list": []},
"time": {"from": "now-15m", "to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
"version": 0,
"weekStart": ""
}
DASHBOARD_JSON
}
# Cleanup function to remove temporary files
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
-e GF_SECURITY_ADMIN_PASSWORD=admin \
-e GF_USERS_ALLOW_SIGN_UP=false \
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
die "Grafana startup failed"
fi

View file

@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
echo "=== Starting Llama Stack Server ==="
export LLAMA_STACK_LOG_WIDTH=120
# Configure telemetry collector for server mode
# Use a fixed port for the OTEL collector so the server can connect to it
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
export OTEL_BSP_SCHEDULE_DELAY="200"
export OTEL_BSP_EXPORT_TIMEOUT="2000"
# remove "server:" from STACK_CONFIG
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
nohup llama stack run $stack_config > server.log 2>&1 &
@ -284,10 +293,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true
# Configure telemetry collector port shared between host and container
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
# Build environment variables for docker run
DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
# Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -308,8 +322,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
fi
echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
# On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
# Use regular port mapping instead
NETWORK_MODE=""
PORT_MAPPINGS=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
else
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
echo "Using bridge networking with port mapping (non-Linux)"
fi
docker run -d $NETWORK_MODE --name "$container_name" \
$PORT_MAPPINGS \
$DOCKER_ENV_VARS \
"$IMAGE_NAME" \
--port $LLAMA_STACK_PORT

View file

@ -6,7 +6,7 @@
# the root directory of this source tree.
set -e
cd llama_stack/ui
cd src/llama_stack/ui
if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
echo "UI dependencies not installed, skipping prettier/linter check"

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Llama Stack'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View file

@ -5,6 +5,7 @@ datasources:
type: prometheus
access: proxy
url: http://prometheus:9090
uid: prometheus
isDefault: true
editable: true

View file

@ -0,0 +1,457 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_completion_tokens_total",
"legendFormat": "{{model_id}} ({{provider_id}})",
"refId": "A"
}
],
"title": "Completion Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_prompt_tokens_total",
"legendFormat": "Prompt - {{model_id}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_tokens_total",
"legendFormat": "Total - {{model_id}}",
"refId": "B"
}
],
"title": "Prompt & Total Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "ms"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
"legendFormat": "p95",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
"legendFormat": "p99",
"refId": "B"
}
],
"title": "HTTP Request Duration (p95, p99)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 6,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
"refId": "A"
}
],
"title": "Total Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 8
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "sum(llama_stack_http_server_active_requests)",
"refId": "A"
}
],
"title": "Active Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "reqps"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
"legendFormat": "{{http_target}} - {{http_status_code}}",
"refId": "A"
}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "Bps"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
"legendFormat": "Request",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
"legendFormat": "Response",
"refId": "B"
}
],
"title": "Request/Response Sizes",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 38,
"tags": [
"llama-stack"
],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
"version": 0,
"weekStart": ""
}

View file

@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
-e GF_SECURITY_ADMIN_PASSWORD=admin \
-e GF_USERS_ALLOW_SIGN_UP=false \
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
docker.io/grafana/grafana:11.0.0
# Wait for services to start

View file

@ -27,4 +27,4 @@ fi
# Run unit tests with coverage
uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@"
coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@"

View file

@ -38,6 +38,7 @@ from .openai_responses import (
OpenAIResponseInputTool,
OpenAIResponseObject,
OpenAIResponseObjectStream,
OpenAIResponsePrompt,
OpenAIResponseText,
)
@ -149,13 +150,13 @@ class ShieldCallStep(StepCommon):
class MemoryRetrievalStep(StepCommon):
"""A memory retrieval step in an agent turn.
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
:param vector_store_ids: The IDs of the vector databases to retrieve context from.
:param inserted_context: The context retrieved from the vector databases.
"""
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
# TODO: should this be List[str]?
vector_db_ids: str
vector_store_ids: str
inserted_context: InterleavedContent
@ -810,6 +811,7 @@ class Agents(Protocol):
self,
input: str | list[OpenAIResponseInput],
model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None,
previous_response_id: str | None = None,
conversation: str | None = None,
@ -831,6 +833,7 @@ class Agents(Protocol):
:param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions.
:param prompt: (Optional) Prompt object with ID, version, and variables.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
:param include: (Optional) Additional fields to include in the response.

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import Sequence
from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, model_validator
from typing_extensions import TypedDict
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
@ -46,23 +47,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
:param type: Content type identifier, always "input_image"
:param file_id: (Optional) The ID of the file to be sent to the model.
:param image_url: (Optional) URL of the image content
"""
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image"
# TODO: handle file_id
file_id: str | None = None
image_url: str | None = None
# TODO: handle file content types
@json_schema_type
class OpenAIResponseInputMessageContentFile(BaseModel):
"""File content for input messages in OpenAI response format.
:param type: The type of the input item. Always `input_file`.
:param file_data: The data of the file to be sent to the model.
:param file_id: (Optional) The ID of the file to be sent to the model.
:param file_url: The URL of the file to be sent to the model.
:param filename: The name of the file to be sent to the model.
"""
type: Literal["input_file"] = "input_file"
file_data: str | None = None
file_id: str | None = None
file_url: str | None = None
filename: str | None = None
@model_validator(mode="after")
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
raise ValueError(
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
)
return self
OpenAIResponseInputMessageContent = Annotated[
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
OpenAIResponseInputMessageContentText
| OpenAIResponseInputMessageContentImage
| OpenAIResponseInputMessageContentFile,
Field(discriminator="type"),
]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponsePrompt(BaseModel):
"""OpenAI compatible Prompt object that is used in OpenAI responses.
:param id: Unique identifier of the prompt template
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
like images or files.
:param version: Version number of the prompt to use (defaults to latest if not specified)
"""
id: str
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
version: str | None = None
@json_schema_type
class OpenAIResponseAnnotationFileCitation(BaseModel):
"""File citation annotation for referencing specific files in response content.
@ -159,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
scenarios.
"""
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message"
@ -211,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
"""
id: str
queries: list[str]
queries: Sequence[str]
status: str
type: Literal["file_search_call"] = "file_search_call"
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type
@ -538,6 +582,7 @@ class OpenAIResponseObject(BaseModel):
:param output: List of generated output items (messages, tool calls, etc.)
:param parallel_tool_calls: Whether tool calls can be executed in parallel
:param previous_response_id: (Optional) ID of the previous response in a conversation
:param prompt: (Optional) Reference to a prompt template and its variables.
:param status: Current status of the response generation
:param temperature: (Optional) Sampling temperature used for generation
:param text: Text formatting configuration for the response
@ -553,16 +598,17 @@ class OpenAIResponseObject(BaseModel):
id: str
model: str
object: Literal["response"] = "response"
output: list[OpenAIResponseOutput]
output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False
previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
status: str
temperature: float | None = None
# Default to text format to avoid breaking the loading of old responses
# before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None
tools: list[OpenAIResponseTool] | None = None
tools: Sequence[OpenAIResponseTool] | None = None
truncation: str | None = None
usage: OpenAIResponseUsage | None = None
instructions: str | None = None
@ -1254,14 +1300,9 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
OpenAIResponseInput = Annotated[
# Responses API allows output messages to be passed in as input
OpenAIResponseOutputMessageWebSearchToolCall
| OpenAIResponseOutputMessageFileSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall
OpenAIResponseOutput
| OpenAIResponseInputFunctionToolCallOutput
| OpenAIResponseMCPApprovalRequest
| OpenAIResponseMCPApprovalResponse
| OpenAIResponseOutputMessageMCPCall
| OpenAIResponseOutputMessageMCPListTools
| OpenAIResponseMessage,
Field(union_mode="left_to_right"),
]
@ -1275,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
:param object: Object type identifier, always "list"
"""
data: list[OpenAIResponseInput]
data: Sequence[OpenAIResponseInput]
object: Literal["list"] = "list"
@ -1286,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
:param input: List of input items that led to this response
"""
input: list[OpenAIResponseInput]
input: Sequence[OpenAIResponseInput]
def to_response_object(self) -> OpenAIResponseObject:
"""Convert to OpenAIResponseObject by excluding input field."""
@ -1304,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
:param object: Object type identifier, always "list"
"""
data: list[OpenAIResponseObjectWithInput]
data: Sequence[OpenAIResponseObjectWithInput]
has_more: bool
first_id: str
last_id: str

View file

@ -21,8 +21,8 @@ from typing_extensions import TypedDict
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.models.llama.datatypes import (
BuiltinTool,
@ -97,7 +97,7 @@ class SamplingParams(BaseModel):
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
max_tokens: int | None = 0
max_tokens: int | None = None
repetition_penalty: float | None = 1.0
stop: list[str] | None = None

Some files were not shown because too many files have changed in this diff Show more