Merge remote-tracking branch 'upstream/main' into elasticsearch-integration

This commit is contained in:
Enrico Zimuel 2025-10-31 18:23:42 +01:00
commit 2407115ee8
No known key found for this signature in database
GPG key ID: 6CB203F6934A69F1
1050 changed files with 65153 additions and 2821 deletions

View file

@ -0,0 +1,64 @@
name: Install llama-stack-client
description: Install llama-stack-client based on branch context and client-version input
inputs:
client-version:
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
required: false
default: ""
outputs:
uv-index-url:
description: 'UV_INDEX_URL to use (set for release branches)'
value: ${{ steps.configure.outputs.uv-index-url }}
uv-extra-index-url:
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
value: ${{ steps.configure.outputs.uv-extra-index-url }}
install-after-sync:
description: 'Whether to install client after uv sync'
value: ${{ steps.configure.outputs.install-after-sync }}
install-source:
description: 'Where to install client from after sync'
value: ${{ steps.configure.outputs.install-source }}
runs:
using: "composite"
steps:
- name: Configure client installation
id: configure
shell: bash
run: |
# Determine the branch we're working with
BRANCH="${{ github.base_ref || github.ref }}"
BRANCH="${BRANCH#refs/heads/}"
echo "Working with branch: $BRANCH"
# On release branches: use test.pypi for uv sync, then install from git
# On non-release branches: install based on client-version after sync
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
echo "Detected release branch: $BRANCH"
# Check if matching branch exists in client repo
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
exit 1
fi
# Configure to use test.pypi for sync (to resolve RC versions)
echo "uv-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
echo "uv-extra-index-url=https://pypi.org/simple/" >> $GITHUB_OUTPUT
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "latest" ]; then
# Install from main git after sync
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
# Use published version from PyPI (installed by sync)
echo "install-after-sync=false" >> $GITHUB_OUTPUT
elif [ -n "${{ inputs.client-version }}" ]; then
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -94,7 +94,7 @@ runs:
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with: with:
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }} name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }}
path: | path: |
*.log *.log
retention-days: 1 retention-days: 1

View file

@ -18,8 +18,17 @@ runs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
version: 0.7.6 version: 0.7.6
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
with:
client-version: ${{ inputs.client-version }}
- name: Install dependencies - name: Install dependencies
shell: bash shell: bash
env:
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: | run: |
echo "Updating project dependencies via uv sync" echo "Updating project dependencies via uv sync"
uv sync --all-groups uv sync --all-groups
@ -27,16 +36,10 @@ runs:
echo "Installing ad-hoc dependencies" echo "Installing ad-hoc dependencies"
uv pip install faiss-cpu uv pip install faiss-cpu
# Install llama-stack-client-python based on the client-version input # Install specific client version after sync if needed
if [ "${{ inputs.client-version }}" = "latest" ]; then if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing latest llama-stack-client-python from main branch" echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main uv pip install ${{ steps.client-config.outputs.install-source }}
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
uv pip install llama-stack-client
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi fi
echo "Installed llama packages" echo "Installed llama packages"

View file

@ -42,18 +42,7 @@ runs:
- name: Build Llama Stack - name: Build Llama Stack
shell: bash shell: bash
run: | run: |
# Install llama-stack-client-python based on the client-version input # Client is already installed by setup-runner (handles both main and release branches)
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "Installing latest llama-stack-client-python from main branch"
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
unset LLAMA_STACK_CLIENT_DIR
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi
echo "Building Llama Stack" echo "Building Llama Stack"
LLAMA_STACK_DIR=. \ LLAMA_STACK_DIR=. \

View file

@ -4,6 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
| Name | File | Purpose | | Name | File | Purpose |
| ---- | ---- | ------- | | ---- | ---- | ------- |
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md | | Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. | | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script | | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |

578
.github/workflows/backward-compat.yml vendored Normal file
View file

@ -0,0 +1,578 @@
name: Backward Compatibility Check
run-name: Check backward compatibility for run.yaml configs
on:
pull_request:
branches:
- main
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+'
paths:
- 'src/llama_stack/core/datatypes.py'
- 'src/llama_stack/providers/datatypes.py'
- 'src/llama_stack/distributions/**/run.yaml'
- 'tests/backward_compat/**'
- '.github/workflows/backward-compat.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check-main-compatibility:
name: Check Compatibility with main
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0 # Need full history to access main branch
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Extract run.yaml files from main branch
id: extract_configs
run: |
# Get list of run.yaml paths from main
git fetch origin main
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "No run.yaml files found in main branch"
exit 1
fi
# Extract all configs to a temp directory
mkdir -p /tmp/main_configs
echo "Extracting configs from main branch:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
# Extract filename for storage
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml"
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files"
- name: Test all configs from main
id: test_configs
continue-on-error: true
run: |
# Run pytest once with all configs parameterized
if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then
echo "failed=false" >> $GITHUB_OUTPUT
else
echo "failed=true" >> $GITHUB_OUTPUT
exit 1
fi
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_configs.outputs.failed == 'true'
run: |
echo "Breaking changes detected. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate results
if: always()
run: |
FAILED="${{ steps.test_configs.outputs.failed }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml."
echo "The changes have been properly acknowledged."
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Breaking changes detected without acknowledgment"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml"
echo "that will break existing user configurations."
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Add the 'breaking-change' label to this PR"
echo " 3. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-main:
name: Run Integration Tests with main Config
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Extract ci-tests run.yaml from main
run: |
git fetch origin main
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
echo "Extracted ci-tests run.yaml from main branch"
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with main config
id: test_integration
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/main-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_integration.outcome == 'failure'
run: |
echo "Integration tests failed. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate integration test results
if: always()
run: |
TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$TEST_FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Integration tests failed with main config but acknowledged"
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Integration tests failed with main config without acknowledgment"
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-release:
name: Run Integration Tests with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract ci-tests run.yaml from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_config
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Try with src/ prefix first (newer releases), then without (older releases)
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
echo "has_config=true" >> $GITHUB_OUTPUT
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
echo "has_config=true" >> $GITHUB_OUTPUT
else
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
echo "has_config=false" >> $GITHUB_OUTPUT
fi
- name: Setup test environment
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (PR branch)
id: test_release_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
git checkout origin/main
- name: Setup test environment for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (main branch)
id: test_release_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_release_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_release_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` are now failing**
⚠️ This PR introduces a breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Compatibility Test Failed (Existing Issue)
**Integration tests against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Compatibility Test Passed
Integration tests against release \`$RELEASE_TAG\` passed successfully.
This PR maintains compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}
check-schema-release-compatibility:
name: Check Schema Compatibility with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract configs from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_release_configs
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Get run.yaml files from the release (try both src/ and old path)
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
echo "has_configs=false" >> $GITHUB_OUTPUT
exit 0
fi
# Extract all configs to a temp directory
mkdir -p /tmp/release_configs
echo "Extracting configs from release $RELEASE_TAG:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files"
echo "has_configs=true" >> $GITHUB_OUTPUT
- name: Test against release configs (PR branch)
id: test_schema_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
git checkout origin/main
- name: Install dependencies for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
uv sync --group dev
- name: Test against release configs (main branch)
id: test_schema_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_schema_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW schema breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` is now failing**
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW schema breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Schema breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Schema Compatibility Failed (Existing Issue)
**Schema validation against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This schema breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Schema Compatibility Passed
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
This PR maintains backward compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}

View file

@ -4,13 +4,17 @@ run-name: Run the integration test suite with Kubernetes authentication
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'distributions/**' - 'distributions/**'
- 'llama_stack/**' - 'src/llama_stack/**'
- '!llama_stack/ui/**' - '!src/llama_stack/ui/**'
- 'tests/integration/**' - 'tests/integration/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -91,6 +95,9 @@ jobs:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
server: server:
port: 8321 port: 8321
EOF EOF

View file

@ -4,11 +4,15 @@ run-name: Run the integration test suite with SqlStore
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'llama_stack/providers/utils/sqlstore/**' - 'src/llama_stack/providers/utils/sqlstore/**'
- 'tests/integration/sqlstore/**' - 'tests/integration/sqlstore/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -64,7 +68,7 @@ jobs:
- name: Upload test logs - name: Upload test logs
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with: with:
name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }} name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
path: | path: |

View file

@ -4,13 +4,17 @@ run-name: Run the integration test suites from tests/integration in replay mode
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
types: [opened, synchronize, reopened] types: [opened, synchronize, reopened]
paths: paths:
- 'llama_stack/**' - 'src/llama_stack/**'
- '!llama_stack/ui/**' - '!src/llama_stack/ui/**'
- 'tests/**' - 'tests/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -47,7 +51,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
client-type: [library, docker] client-type: [library, docker, server]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}

View file

@ -4,12 +4,16 @@ run-name: Run the integration test suite with various VectorIO providers
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'llama_stack/**' - 'src/llama_stack/**'
- '!llama_stack/ui/**' - '!src/llama_stack/ui/**'
- 'tests/integration/vector_io/**' - 'tests/integration/vector_io/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -209,7 +213,7 @@ jobs:
- name: Upload all logs to artifacts - name: Upload all logs to artifacts
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with: with:
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }} name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
path: | path: |

View file

@ -5,7 +5,9 @@ run-name: Run pre-commit checks
on: on:
pull_request: pull_request:
push: push:
branches: [main] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
@ -41,25 +43,43 @@ jobs:
with: with:
node-version: '20' node-version: '20'
cache: 'npm' cache: 'npm'
cache-dependency-path: 'llama_stack/ui/' cache-dependency-path: 'src/llama_stack/ui/'
- name: Set up uv
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
- name: Install npm dependencies - name: Install npm dependencies
run: npm ci run: npm ci
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
- name: Install pre-commit
run: python -m pip install pre-commit
- name: Cache pre-commit
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Run pre-commit - name: Run pre-commit
id: precommit id: precommit
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 run: |
continue-on-error: true set +e
pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log
status=${PIPESTATUS[0]}
echo "status=$status" >> $GITHUB_OUTPUT
exit 0
env: env:
SKIP: no-commit-to-branch SKIP: no-commit-to-branch,mypy
RUFF_OUTPUT_FORMAT: github RUFF_OUTPUT_FORMAT: github
- name: Check pre-commit results - name: Check pre-commit results
if: steps.precommit.outcome == 'failure' if: steps.precommit.outputs.status != '0'
run: | run: |
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes." echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
echo "::warning::Some pre-commit hooks failed. Check the output above for details." echo ""
echo "Failed hooks output:"
cat /tmp/precommit.log
exit 1 exit 1
- name: Debug - name: Debug
@ -109,3 +129,30 @@ jobs:
echo "$unstaged_files" echo "$unstaged_files"
exit 1 exit 1
fi fi
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
- name: Sync dev + type_checking dependencies
env:
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: |
uv sync --group dev --group type_checking
# Install specific client version after sync if needed
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install ${{ steps.client-config.outputs.install-source }}
fi
- name: Run mypy (full type_checking)
run: |
set +e
uv run --group dev --group type_checking mypy
status=$?
if [ $status -ne 0 ]; then
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status

View file

@ -145,12 +145,12 @@ jobs:
with: with:
node-version: '20' node-version: '20'
cache: 'npm' cache: 'npm'
cache-dependency-path: 'llama_stack/ui/' cache-dependency-path: 'src/llama_stack/ui/'
- name: Install npm dependencies - name: Install npm dependencies
if: steps.check_author.outputs.authorized == 'true' if: steps.check_author.outputs.authorized == 'true'
run: npm ci run: npm ci
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
- name: Run pre-commit - name: Run pre-commit
if: steps.check_author.outputs.authorized == 'true' if: steps.check_author.outputs.authorized == 'true'

View file

@ -7,24 +7,24 @@ on:
branches: branches:
- main - main
paths: paths:
- 'llama_stack/cli/stack/build.py' - 'src/llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py' - 'src/llama_stack/cli/stack/_build.py'
- 'llama_stack/core/build.*' - 'src/llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/distributions/**' - 'src/llama_stack/distributions/**'
- 'pyproject.toml' - 'pyproject.toml'
- 'containers/Containerfile' - 'containers/Containerfile'
- '.dockerignore' - '.dockerignore'
pull_request: pull_request:
paths: paths:
- 'llama_stack/cli/stack/build.py' - 'src/llama_stack/cli/stack/build.py'
- 'llama_stack/cli/stack/_build.py' - 'src/llama_stack/cli/stack/_build.py'
- 'llama_stack/core/build.*' - 'src/llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-build.yml' - '.github/workflows/providers-build.yml'
- 'llama_stack/distributions/**' - 'src/llama_stack/distributions/**'
- 'pyproject.toml' - 'pyproject.toml'
- 'containers/Containerfile' - 'containers/Containerfile'
- '.dockerignore' - '.dockerignore'
@ -45,7 +45,7 @@ jobs:
- name: Generate Distribution List - name: Generate Distribution List
id: set-matrix id: set-matrix
run: | run: |
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "distros=$distros" >> "$GITHUB_OUTPUT" echo "distros=$distros" >> "$GITHUB_OUTPUT"
build: build:
@ -107,13 +107,13 @@ jobs:
- name: Build container image - name: Build container image
run: | run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml) BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ --build-arg INSTALL_MODE=editable \
--build-arg DISTRO_NAME=ci-tests \ --build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \ --build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \ --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests -t llama-stack:ci-tests
- name: Inspect the container image entrypoint - name: Inspect the container image entrypoint
@ -143,17 +143,17 @@ jobs:
run: | run: |
yq -i ' yq -i '
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest" .distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
' llama_stack/distributions/ci-tests/build.yaml ' src/llama_stack/distributions/ci-tests/build.yaml
- name: Build UBI9 container image - name: Build UBI9 container image
run: | run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml) BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ --build-arg INSTALL_MODE=editable \
--build-arg DISTRO_NAME=ci-tests \ --build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \ --build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \ --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests-ubi9 -t llama-stack:ci-tests-ubi9
- name: Inspect UBI9 image - name: Inspect UBI9 image

View file

@ -7,22 +7,22 @@ on:
branches: branches:
- main - main
paths: paths:
- 'llama_stack/cli/stack/list_deps.py' - 'src/llama_stack/cli/stack/list_deps.py'
- 'llama_stack/cli/stack/_list_deps.py' - 'src/llama_stack/cli/stack/_list_deps.py'
- 'llama_stack/core/build.*' - 'src/llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-list-deps.yml' - '.github/workflows/providers-list-deps.yml'
- 'llama_stack/templates/**' - 'src/llama_stack/templates/**'
- 'pyproject.toml' - 'pyproject.toml'
pull_request: pull_request:
paths: paths:
- 'llama_stack/cli/stack/list_deps.py' - 'src/llama_stack/cli/stack/list_deps.py'
- 'llama_stack/cli/stack/_list_deps.py' - 'src/llama_stack/cli/stack/_list_deps.py'
- 'llama_stack/core/build.*' - 'src/llama_stack/core/build.*'
- 'llama_stack/core/*.sh' - 'src/llama_stack/core/*.sh'
- '.github/workflows/providers-list-deps.yml' - '.github/workflows/providers-list-deps.yml'
- 'llama_stack/templates/**' - 'src/llama_stack/templates/**'
- 'pyproject.toml' - 'pyproject.toml'
concurrency: concurrency:
@ -41,7 +41,7 @@ jobs:
- name: Generate Distribution List - name: Generate Distribution List
id: set-matrix id: set-matrix
run: | run: |
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]') distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
echo "distros=$distros" >> "$GITHUB_OUTPUT" echo "distros=$distros" >> "$GITHUB_OUTPUT"
list-deps: list-deps:
@ -102,4 +102,4 @@ jobs:
USE_COPY_NOT_MOUNT: "true" USE_COPY_NOT_MOUNT: "true"
LLAMA_STACK_DIR: "." LLAMA_STACK_DIR: "."
run: | run: |
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml

View file

@ -10,7 +10,7 @@ on:
branches: branches:
- main - main
paths-ignore: paths-ignore:
- 'llama_stack/ui/**' - 'src/llama_stack/ui/**'
jobs: jobs:
build: build:
@ -24,7 +24,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0 uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
activate-environment: true activate-environment: true

View file

@ -8,7 +8,7 @@ on:
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths: paths:
- 'llama_stack/**' - 'src/llama_stack/**'
- 'tests/integration/**' - 'tests/integration/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -78,7 +78,7 @@ jobs:
- name: Upload all logs to artifacts - name: Upload all logs to artifacts
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with: with:
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test
path: | path: |

View file

@ -8,8 +8,8 @@ on:
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths: paths:
- 'llama_stack/**' - 'src/llama_stack/**'
- '!llama_stack/ui/**' - '!src/llama_stack/ui/**'
- 'tests/integration/**' - 'tests/integration/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -84,7 +84,7 @@ jobs:
- name: Upload all logs to artifacts - name: Upload all logs to artifacts
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with: with:
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test
path: | path: |

View file

@ -8,7 +8,7 @@ on:
pull_request: pull_request:
branches: [ main ] branches: [ main ]
paths: paths:
- 'llama_stack/ui/**' - 'src/llama_stack/ui/**'
- '.github/workflows/ui-unit-tests.yml' # This workflow - '.github/workflows/ui-unit-tests.yml' # This workflow
workflow_dispatch: workflow_dispatch:
@ -33,22 +33,22 @@ jobs:
with: with:
node-version: ${{ matrix.node-version }} node-version: ${{ matrix.node-version }}
cache: 'npm' cache: 'npm'
cache-dependency-path: 'llama_stack/ui/package-lock.json' cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
- name: Install dependencies - name: Install dependencies
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
run: npm ci run: npm ci
- name: Run linting - name: Run linting
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
run: npm run lint run: npm run lint
- name: Run format check - name: Run format check
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
run: npm run format:check run: npm run format:check
- name: Run unit tests - name: Run unit tests
working-directory: llama_stack/ui working-directory: src/llama_stack/ui
env: env:
CI: true CI: true

View file

@ -4,12 +4,16 @@ run-name: Run the unit test suite
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'llama_stack/**' - 'src/llama_stack/**'
- '!llama_stack/ui/**' - '!src/llama_stack/ui/**'
- 'tests/unit/**' - 'tests/unit/**'
- 'uv.lock' - 'uv.lock'
- 'pyproject.toml' - 'pyproject.toml'
@ -45,7 +49,7 @@ jobs:
- name: Upload test results - name: Upload test results
if: always() if: always()
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
with: with:
name: test-results-${{ matrix.python }} name: test-results-${{ matrix.python }}
path: | path: |

3
.gitignore vendored
View file

@ -32,3 +32,6 @@ CLAUDE.md
docs/.docusaurus/ docs/.docusaurus/
docs/node_modules/ docs/node_modules/
docs/static/imported-files/ docs/static/imported-files/
docs/docs/api-deprecated/
docs/docs/api-experimental/
docs/docs/api/

View file

@ -42,7 +42,7 @@ repos:
hooks: hooks:
- id: ruff - id: ruff
args: [ --fix ] args: [ --fix ]
exclude: ^llama_stack/strong_typing/.*$ exclude: ^src/llama_stack/strong_typing/.*$
- id: ruff-format - id: ruff-format
- repo: https://github.com/adamchainz/blacken-docs - repo: https://github.com/adamchainz/blacken-docs
@ -58,18 +58,27 @@ repos:
- id: uv-lock - id: uv-lock
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.16.1 rev: v1.18.2
hooks: hooks:
- id: mypy - id: mypy
additional_dependencies: additional_dependencies:
- uv==0.6.2 - uv==0.6.2
- mypy
- pytest - pytest
- rich - rich
- types-requests - types-requests
- pydantic - pydantic
- httpx
pass_filenames: false pass_filenames: false
- repo: local
hooks:
- id: mypy-full
name: mypy (full type_checking)
entry: uv run --group dev --group type_checking mypy
language: system
pass_filenames: false
stages: [manual]
# - repo: https://github.com/tcort/markdown-link-check # - repo: https://github.com/tcort/markdown-link-check
# rev: v3.11.2 # rev: v3.11.2
# hooks: # hooks:
@ -86,7 +95,7 @@ repos:
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$ files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
- id: provider-codegen - id: provider-codegen
name: Provider Codegen name: Provider Codegen
additional_dependencies: additional_dependencies:
@ -95,7 +104,7 @@ repos:
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
files: ^llama_stack/providers/.*$ files: ^src/llama_stack/providers/.*$
- id: openapi-codegen - id: openapi-codegen
name: API Spec Codegen name: API Spec Codegen
additional_dependencies: additional_dependencies:
@ -104,7 +113,7 @@ repos:
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
files: ^llama_stack/apis/|^docs/openapi_generator/ files: ^src/llama_stack/apis/|^docs/openapi_generator/
- id: check-workflows-use-hashes - id: check-workflows-use-hashes
name: Check GitHub Actions use SHA-pinned actions name: Check GitHub Actions use SHA-pinned actions
entry: ./scripts/check-workflows-use-hashes.sh entry: ./scripts/check-workflows-use-hashes.sh
@ -120,7 +129,7 @@ repos:
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
always_run: true always_run: true
files: ^llama_stack/.*$ files: ^src/llama_stack/.*$
- id: forbid-pytest-asyncio - id: forbid-pytest-asyncio
name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
entry: bash entry: bash
@ -150,10 +159,9 @@ repos:
name: Format & Lint UI name: Format & Lint UI
entry: bash ./scripts/run-ui-linter.sh entry: bash ./scripts/run-ui-linter.sh
language: system language: system
files: ^llama_stack/ui/.*\.(ts|tsx)$ files: ^src/llama_stack/ui/.*\.(ts|tsx)$
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
- id: check-log-usage - id: check-log-usage
name: Ensure 'llama_stack.log' usage for logging name: Ensure 'llama_stack.log' usage for logging
entry: bash entry: bash
@ -172,7 +180,23 @@ repos:
exit 1 exit 1
fi fi
exit 0 exit 0
- id: fips-compliance
name: Ensure llama-stack remains FIPS compliant
entry: bash
language: system
types: [python]
pass_filenames: true
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
args:
- -c
- |
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
echo;
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
echo " These functions are not FIPS-compliant"
echo;
exit 1;
} || true
ci: ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate

View file

@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify. The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
To run the expanded mypy configuration that CI enforces, use:
```bash
uv run pre-commit run mypy-full --hook-stage manual --all-files
```
or invoke mypy directly with all optional dependencies:
```bash
uv run --group dev --group type_checking mypy
```
```{caution} ```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully. Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
``` ```

View file

@ -1,11 +1,11 @@
include pyproject.toml include pyproject.toml
include llama_stack/models/llama/llama3/tokenizer.model include src/llama_stack/models/llama/llama3/tokenizer.model
include llama_stack/models/llama/llama4/tokenizer.model include src/llama_stack/models/llama/llama4/tokenizer.model
include llama_stack/core/*.sh include src/llama_stack/core/*.sh
include llama_stack/cli/scripts/*.sh include src/llama_stack/cli/scripts/*.sh
include llama_stack/distributions/*/*.yaml include src/llama_stack/distributions/*/*.yaml
exclude llama_stack/distributions/ci-tests exclude src/llama_stack/distributions/ci-tests
include tests/integration/test_cases/inference/*.json include tests/integration/test_cases/inference/*.json
include llama_stack/models/llama/*/*.md include src/llama_stack/models/llama/*/*.md
include llama_stack/tests/integration/*.jpg include src/llama_stack/tests/integration/*.jpg
prune llama_stack/distributions/ci-tests prune src/llama_stack/distributions/ci-tests

View file

@ -44,14 +44,6 @@ data:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
type: sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -115,13 +107,21 @@ data:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
references: stores:
metadata: metadata:
backend: kv_default backend: kv_default
namespace: registry namespace: registry
inference: inference:
backend: sql_default backend: sql_default
table_name: inference_store table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -36,14 +36,6 @@ providers:
persistence: persistence:
namespace: vector_io::chroma_remote namespace: vector_io::chroma_remote
backend: kv_default backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety: safety:
- provider_id: llama-guard - provider_id: llama-guard
provider_type: inline::llama-guard provider_type: inline::llama-guard
@ -108,6 +100,9 @@ storage:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources: registered_resources:
models: models:
- metadata: - metadata:

View file

@ -1,610 +0,0 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
beta:
subresources:
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

View file

@ -15,6 +15,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -4212,6 +4347,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -5474,11 +5934,44 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage: OpenAIResponseInputMessageContentImage:
type: object type: object
properties: properties:
@ -5499,6 +5992,10 @@ components:
default: input_image default: input_image
description: >- description: >-
Content type identifier, always "input_image" Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url: image_url:
type: string type: string
description: (Optional) URL of the image content description: (Optional) URL of the image content
@ -6735,14 +7232,9 @@ components:
Error details for failed OpenAI response requests. Error details for failed OpenAI response requests.
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage' - $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch: OpenAIResponseInputToolFileSearch:
type: object type: object
@ -6898,6 +7390,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -6971,6 +7467,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText: OpenAIResponseText:
type: object type: object
properties: properties:
@ -7228,6 +7748,10 @@ components:
model: model:
type: string type: string
description: The underlying LLM used for completions. description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions: instructions:
type: string type: string
previous_response_id: previous_response_id:
@ -7305,6 +7829,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -9867,7 +10395,7 @@ components:
$ref: '#/components/schemas/RAGDocument' $ref: '#/components/schemas/RAGDocument'
description: >- description: >-
List of documents to index in the RAG system List of documents to index in the RAG system
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
ID of the vector database to store the document embeddings ID of the vector database to store the document embeddings
@ -9878,7 +10406,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- documents - documents
- vector_db_id - vector_store_id
- chunk_size_in_tokens - chunk_size_in_tokens
title: InsertRequest title: InsertRequest
DefaultRAGQueryGeneratorConfig: DefaultRAGQueryGeneratorConfig:
@ -10049,7 +10577,7 @@ components:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >- description: >-
The query content to search for in the indexed documents The query content to search for in the indexed documents
vector_db_ids: vector_store_ids:
type: array type: array
items: items:
type: string type: string
@ -10062,7 +10590,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- vector_db_ids - vector_store_ids
title: QueryRequest title: QueryRequest
RAGQueryResult: RAGQueryResult:
type: object type: object
@ -10190,6 +10718,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10210,10 +10742,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -10222,6 +10750,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -10286,7 +10815,7 @@ components:
InsertChunksRequest: InsertChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to insert the chunks into. The identifier of the vector database to insert the chunks into.
@ -10305,13 +10834,13 @@ components:
description: The time to live of the chunks. description: The time to live of the chunks.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- chunks - chunks
title: InsertChunksRequest title: InsertChunksRequest
QueryChunksRequest: QueryChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to query. The identifier of the vector database to query.
@ -10331,7 +10860,7 @@ components:
description: The parameters of the query. description: The parameters of the query.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- query - query
title: QueryChunksRequest title: QueryChunksRequest
QueryChunksResponse: QueryChunksResponse:
@ -11600,7 +12129,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's
@ -11850,7 +12378,7 @@ components:
description: Type of the step in an agent turn. description: Type of the step in an agent turn.
const: memory_retrieval const: memory_retrieval
default: memory_retrieval default: memory_retrieval
vector_db_ids: vector_store_ids:
type: string type: string
description: >- description: >-
The IDs of the vector databases to retrieve context from. The IDs of the vector databases to retrieve context from.
@ -11863,7 +12391,7 @@ components:
- turn_id - turn_id
- step_id - step_id
- step_type - step_type
- vector_db_ids - vector_store_ids
- inserted_context - inserted_context
title: MemoryRetrievalStep title: MemoryRetrievalStep
description: >- description: >-
@ -13460,6 +13988,19 @@ tags:
description: >- description: >-
APIs for creating and interacting with agentic systems. APIs for creating and interacting with agentic systems.
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: Conversations - name: Conversations
@ -13534,6 +14075,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- Conversations - Conversations
- DatasetIO - DatasetIO

View file

@ -58,13 +58,21 @@ storage:
sql_default: sql_default:
type: sql_sqlite type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
references: stores:
metadata: metadata:
backend: kv_default backend: kv_default
namespace: registry namespace: registry
inference: inference:
backend: sql_default backend: sql_default
table_name: inference_store table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models: models:
- metadata: {} - metadata: {}
model_id: ${env.INFERENCE_MODEL} model_id: ${env.INFERENCE_MODEL}

View file

@ -113,13 +113,21 @@ data:
db: ${env.POSTGRES_DB:=llamastack} db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack} user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack} password: ${env.POSTGRES_PASSWORD:=llamastack}
references: stores:
metadata: metadata:
backend: kv_default backend: kv_default
namespace: registry namespace: registry
inference: inference:
backend: sql_default backend: sql_default
table_name: inference_store table_name: inference_store
max_write_queue_size: 10000
num_writers: 4
conversations:
backend: sql_default
table_name: openai_conversations
prompts:
backend: kv_default
namespace: prompts
models: models:
- metadata: - metadata:
embedding_dimension: 768 embedding_dimension: 768

View file

@ -106,6 +106,9 @@ storage:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources: registered_resources:
models: models:
- metadata: - metadata:

View file

@ -79,6 +79,33 @@ docker run \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
--gpu all \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv ### Via venv
Make sure you have the Llama Stack CLI available. Make sure you have the Llama Stack CLI available.

View file

@ -127,13 +127,39 @@ docker run \
-it \ -it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \ -v ~/.llama:/root/.llama \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \ llamastack/distribution-nvidia \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv ### Via venv
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.

View file

@ -0,0 +1,27 @@
---
description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
sidebar_label: Remote - Openai
title: remote::openai
---
# remote::openai
## Description
OpenAI Files API provider for managing files through OpenAI's native file storage service.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
metadata_store:
table_name: openai_files_metadata
backend: sql_default
```

View file

@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests | | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | | `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration ## Sample Configuration

View file

@ -72,14 +72,14 @@ description: |
Example with hybrid search: Example with hybrid search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
) )
# Using RRF ranker # Using RRF ranker
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={ params={
"mode": "hybrid", "mode": "hybrid",
@ -91,7 +91,7 @@ description: |
# Using weighted ranker # Using weighted ranker
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={ params={
"mode": "hybrid", "mode": "hybrid",
@ -105,7 +105,7 @@ description: |
Example with explicit vector search: Example with explicit vector search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
) )
@ -114,7 +114,7 @@ description: |
Example with keyword search: Example with keyword search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
) )
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
Example with hybrid search: Example with hybrid search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
) )
# Using RRF ranker # Using RRF ranker
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={ params={
"mode": "hybrid", "mode": "hybrid",
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(
# Using weighted ranker # Using weighted ranker
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={ params={
"mode": "hybrid", "mode": "hybrid",
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
Example with explicit vector search: Example with explicit vector search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
) )
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
Example with keyword search: Example with keyword search:
```python ```python
response = await vector_io.query_chunks( response = await vector_io.query_chunks(
vector_db_id="my_db", vector_store_id="my_db",
query="your query here", query="your query here",
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7}, params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
) )

File diff suppressed because it is too large Load diff

View file

@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
'providers/eval/remote_nvidia' 'providers/eval/remote_nvidia'
], ],
}, },
{
type: 'category',
label: 'Telemetry',
collapsed: true,
items: [
'providers/telemetry/index',
'providers/telemetry/inline_meta-reference'
],
},
{ {
type: 'category', type: 'category',
label: 'Batches', label: 'Batches',

View file

@ -1414,6 +1414,193 @@
"deprecated": true "deprecated": true
} }
}, },
"/v1/openai/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": true
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": true
}
},
"/v1/openai/v1/chat/completions": { "/v1/openai/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -3901,7 +4088,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {
@ -4391,7 +4577,7 @@
"const": "memory_retrieval", "const": "memory_retrieval",
"default": "memory_retrieval" "default": "memory_retrieval"
}, },
"vector_db_ids": { "vector_store_ids": {
"type": "string", "type": "string",
"description": "The IDs of the vector databases to retrieve context from." "description": "The IDs of the vector databases to retrieve context from."
}, },
@ -4405,7 +4591,7 @@
"turn_id", "turn_id",
"step_id", "step_id",
"step_type", "step_type",
"vector_db_ids", "vector_store_ids",
"inserted_context" "inserted_context"
], ],
"title": "MemoryRetrievalStep", "title": "MemoryRetrievalStep",
@ -6402,6 +6588,451 @@
"title": "Job", "title": "Job",
"description": "A job execution instance with status tracking." "description": "A job execution instance with status tracking."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -8527,29 +9158,14 @@
"OpenAIResponseInput": { "OpenAIResponseInput": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" "$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMessage" "$ref": "#/components/schemas/OpenAIResponseMessage"
} }
@ -8592,16 +9208,53 @@
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText", "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage" "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
} }
}, },
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": { "OpenAIResponseInputMessageContentImage": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -8629,6 +9282,10 @@
"default": "input_image", "default": "input_image",
"description": "Content type identifier, always \"input_image\"" "description": "Content type identifier, always \"input_image\""
}, },
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": { "image_url": {
"type": "string", "type": "string",
"description": "(Optional) URL of the image content" "description": "(Optional) URL of the image content"
@ -8992,6 +9649,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -9416,6 +10077,32 @@
"title": "OpenAIResponseOutputMessageWebSearchToolCall", "title": "OpenAIResponseOutputMessageWebSearchToolCall",
"description": "Web search tool call output message for OpenAI responses." "description": "Web search tool call output message for OpenAI responses."
}, },
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": { "OpenAIResponseText": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9786,6 +10473,10 @@
"type": "string", "type": "string",
"description": "The underlying LLM used for completions." "description": "The underlying LLM used for completions."
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": { "instructions": {
"type": "string" "type": "string"
}, },
@ -9874,6 +10565,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -13442,6 +14137,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n", "description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Benchmarks", "name": "Benchmarks",
"description": "" "description": ""
@ -13492,6 +14192,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Benchmarks", "Benchmarks",
"DatasetIO", "DatasetIO",
"Datasets", "Datasets",

View file

@ -1012,6 +1012,141 @@ paths:
schema: schema:
type: string type: string
deprecated: true deprecated: true
/v1/openai/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: true
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: true
/v1/openai/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: true
/v1/openai/v1/chat/completions: /v1/openai/v1/chat/completions:
get: get:
responses: responses:
@ -2862,7 +2997,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's
@ -3253,7 +3387,7 @@ components:
description: Type of the step in an agent turn. description: Type of the step in an agent turn.
const: memory_retrieval const: memory_retrieval
default: memory_retrieval default: memory_retrieval
vector_db_ids: vector_store_ids:
type: string type: string
description: >- description: >-
The IDs of the vector databases to retrieve context from. The IDs of the vector databases to retrieve context from.
@ -3266,7 +3400,7 @@ components:
- turn_id - turn_id
- step_id - step_id
- step_type - step_type
- vector_db_ids - vector_store_ids
- inserted_context - inserted_context
title: MemoryRetrievalStep title: MemoryRetrievalStep
description: >- description: >-
@ -4737,6 +4871,331 @@ components:
title: Job title: Job
description: >- description: >-
A job execution instance with status tracking. A job execution instance with status tracking.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -6370,14 +6829,9 @@ components:
Error details for failed OpenAI response requests. Error details for failed OpenAI response requests.
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage' - $ref: '#/components/schemas/OpenAIResponseMessage'
"OpenAIResponseInputFunctionToolCallOutput": "OpenAIResponseInputFunctionToolCallOutput":
type: object type: object
@ -6408,11 +6862,44 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage: OpenAIResponseInputMessageContentImage:
type: object type: object
properties: properties:
@ -6433,6 +6920,10 @@ components:
default: input_image default: input_image
description: >- description: >-
Content type identifier, always "input_image" Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url: image_url:
type: string type: string
description: (Optional) URL of the image content description: (Optional) URL of the image content
@ -6703,6 +7194,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -7042,6 +7537,30 @@ components:
OpenAIResponseOutputMessageWebSearchToolCall OpenAIResponseOutputMessageWebSearchToolCall
description: >- description: >-
Web search tool call output message for OpenAI responses. Web search tool call output message for OpenAI responses.
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText: OpenAIResponseText:
type: object type: object
properties: properties:
@ -7299,6 +7818,10 @@ components:
model: model:
type: string type: string
description: The underlying LLM used for completions. description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions: instructions:
type: string type: string
previous_response_id: previous_response_id:
@ -7376,6 +7899,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -10196,6 +10723,19 @@ tags:
- **Responses API**: Use the stable v1 Responses API endpoints - **Responses API**: Use the stable v1 Responses API endpoints
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: DatasetIO - name: DatasetIO
@ -10241,6 +10781,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- DatasetIO - DatasetIO
- Datasets - Datasets

View file

@ -2376,7 +2376,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {
@ -2866,7 +2865,7 @@
"const": "memory_retrieval", "const": "memory_retrieval",
"default": "memory_retrieval" "default": "memory_retrieval"
}, },
"vector_db_ids": { "vector_store_ids": {
"type": "string", "type": "string",
"description": "The IDs of the vector databases to retrieve context from." "description": "The IDs of the vector databases to retrieve context from."
}, },
@ -2880,7 +2879,7 @@
"turn_id", "turn_id",
"step_id", "step_id",
"step_type", "step_type",
"vector_db_ids", "vector_store_ids",
"inserted_context" "inserted_context"
], ],
"title": "MemoryRetrievalStep", "title": "MemoryRetrievalStep",

View file

@ -1695,7 +1695,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's
@ -2086,7 +2085,7 @@ components:
description: Type of the step in an agent turn. description: Type of the step in an agent turn.
const: memory_retrieval const: memory_retrieval
default: memory_retrieval default: memory_retrieval
vector_db_ids: vector_store_ids:
type: string type: string
description: >- description: >-
The IDs of the vector databases to retrieve context from. The IDs of the vector databases to retrieve context from.
@ -2099,7 +2098,7 @@ components:
- turn_id - turn_id
- step_id - step_id
- step_type - step_type
- vector_db_ids - vector_store_ids
- inserted_context - inserted_context
title: MemoryRetrievalStep title: MemoryRetrievalStep
description: >- description: >-

View file

@ -40,6 +40,193 @@
} }
], ],
"paths": { "paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": { "/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -4005,6 +4192,451 @@
"title": "Error", "title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807." "description": "Error response from the API. Roughly follows RFC 7807."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -5696,16 +6328,53 @@
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText", "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage" "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
} }
}, },
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": { "OpenAIResponseInputMessageContentImage": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -5733,6 +6402,10 @@
"default": "input_image", "default": "input_image",
"description": "Content type identifier, always \"input_image\"" "description": "Content type identifier, always \"input_image\""
}, },
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": { "image_url": {
"type": "string", "type": "string",
"description": "(Optional) URL of the image content" "description": "(Optional) URL of the image content"
@ -7305,29 +7978,14 @@
"OpenAIResponseInput": { "OpenAIResponseInput": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" "$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMessage" "$ref": "#/components/schemas/OpenAIResponseMessage"
} }
@ -7536,6 +8194,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -7631,6 +8293,32 @@
} }
} }
}, },
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": { "OpenAIResponseText": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -8001,6 +8689,10 @@
"type": "string", "type": "string",
"description": "The underlying LLM used for completions." "description": "The underlying LLM used for completions."
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": { "instructions": {
"type": "string" "type": "string"
}, },
@ -8089,6 +8781,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -11427,7 +12123,7 @@
}, },
"description": "List of documents to index in the RAG system" "description": "List of documents to index in the RAG system"
}, },
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "ID of the vector database to store the document embeddings" "description": "ID of the vector database to store the document embeddings"
}, },
@ -11439,7 +12135,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"documents", "documents",
"vector_db_id", "vector_store_id",
"chunk_size_in_tokens" "chunk_size_in_tokens"
], ],
"title": "InsertRequest" "title": "InsertRequest"
@ -11630,7 +12326,7 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The query content to search for in the indexed documents" "description": "The query content to search for in the indexed documents"
}, },
"vector_db_ids": { "vector_store_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
@ -11645,7 +12341,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"vector_db_ids" "vector_store_ids"
], ],
"title": "QueryRequest" "title": "QueryRequest"
}, },
@ -11833,6 +12529,10 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types." "description": "The content of the chunk, which can be interleaved text, images, or other types."
}, },
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": { "metadata": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@ -11866,10 +12566,6 @@
}, },
"description": "Optional embedding for the chunk. If not provided, it will be computed later." "description": "Optional embedding for the chunk. If not provided, it will be computed later."
}, },
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": { "chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata", "$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -11878,6 +12574,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"chunk_id",
"metadata" "metadata"
], ],
"title": "Chunk", "title": "Chunk",
@ -11938,7 +12635,7 @@
"InsertChunksRequest": { "InsertChunksRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "The identifier of the vector database to insert the chunks into." "description": "The identifier of the vector database to insert the chunks into."
}, },
@ -11956,7 +12653,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"vector_db_id", "vector_store_id",
"chunks" "chunks"
], ],
"title": "InsertChunksRequest" "title": "InsertChunksRequest"
@ -11964,7 +12661,7 @@
"QueryChunksRequest": { "QueryChunksRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "The identifier of the vector database to query." "description": "The identifier of the vector database to query."
}, },
@ -12001,7 +12698,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"vector_db_id", "vector_store_id",
"query" "query"
], ],
"title": "QueryChunksRequest" "title": "QueryChunksRequest"
@ -13224,6 +13921,11 @@
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`", "description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Conversations", "name": "Conversations",
"description": "Protocol for conversation management operations.", "description": "Protocol for conversation management operations.",
@ -13297,6 +13999,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Conversations", "Conversations",
"Files", "Files",
"Inference", "Inference",

View file

@ -12,6 +12,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -2999,6 +3134,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -4261,11 +4721,44 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage: OpenAIResponseInputMessageContentImage:
type: object type: object
properties: properties:
@ -4286,6 +4779,10 @@ components:
default: input_image default: input_image
description: >- description: >-
Content type identifier, always "input_image" Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url: image_url:
type: string type: string
description: (Optional) URL of the image content description: (Optional) URL of the image content
@ -5522,14 +6019,9 @@ components:
Error details for failed OpenAI response requests. Error details for failed OpenAI response requests.
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage' - $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch: OpenAIResponseInputToolFileSearch:
type: object type: object
@ -5685,6 +6177,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -5758,6 +6254,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText: OpenAIResponseText:
type: object type: object
properties: properties:
@ -6015,6 +6535,10 @@ components:
model: model:
type: string type: string
description: The underlying LLM used for completions. description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions: instructions:
type: string type: string
previous_response_id: previous_response_id:
@ -6092,6 +6616,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -8654,7 +9182,7 @@ components:
$ref: '#/components/schemas/RAGDocument' $ref: '#/components/schemas/RAGDocument'
description: >- description: >-
List of documents to index in the RAG system List of documents to index in the RAG system
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
ID of the vector database to store the document embeddings ID of the vector database to store the document embeddings
@ -8665,7 +9193,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- documents - documents
- vector_db_id - vector_store_id
- chunk_size_in_tokens - chunk_size_in_tokens
title: InsertRequest title: InsertRequest
DefaultRAGQueryGeneratorConfig: DefaultRAGQueryGeneratorConfig:
@ -8836,7 +9364,7 @@ components:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >- description: >-
The query content to search for in the indexed documents The query content to search for in the indexed documents
vector_db_ids: vector_store_ids:
type: array type: array
items: items:
type: string type: string
@ -8849,7 +9377,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- vector_db_ids - vector_store_ids
title: QueryRequest title: QueryRequest
RAGQueryResult: RAGQueryResult:
type: object type: object
@ -8977,6 +9505,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -8997,10 +9529,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -9009,6 +9537,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -9073,7 +9602,7 @@ components:
InsertChunksRequest: InsertChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to insert the chunks into. The identifier of the vector database to insert the chunks into.
@ -9092,13 +9621,13 @@ components:
description: The time to live of the chunks. description: The time to live of the chunks.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- chunks - chunks
title: InsertChunksRequest title: InsertChunksRequest
QueryChunksRequest: QueryChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to query. The identifier of the vector database to query.
@ -9118,7 +9647,7 @@ components:
description: The parameters of the query. description: The parameters of the query.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- query - query
title: QueryChunksRequest title: QueryChunksRequest
QueryChunksResponse: QueryChunksResponse:
@ -10075,6 +10604,19 @@ tags:
- `background` - `background`
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Conversations - name: Conversations
description: >- description: >-
Protocol for conversation management operations. Protocol for conversation management operations.
@ -10137,6 +10679,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Conversations - Conversations
- Files - Files
- Inference - Inference

View file

@ -40,6 +40,193 @@
} }
], ],
"paths": { "paths": {
"/v1/batches": {
"get": {
"responses": {
"200": {
"description": "A list of batch objects.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/ListBatchesResponse"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "List all batches for the current user.",
"description": "List all batches for the current user.",
"parameters": [
{
"name": "after",
"in": "query",
"description": "A cursor for pagination; returns batches after this batch ID.",
"required": false,
"schema": {
"type": "string"
}
},
{
"name": "limit",
"in": "query",
"description": "Number of batches to return (default 20, max 100).",
"required": true,
"schema": {
"type": "integer"
}
}
],
"deprecated": false
},
"post": {
"responses": {
"200": {
"description": "The created batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Create a new batch for processing multiple API requests.",
"description": "Create a new batch for processing multiple API requests.",
"parameters": [],
"requestBody": {
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/CreateBatchRequest"
}
}
},
"required": true
},
"deprecated": false
}
},
"/v1/batches/{batch_id}": {
"get": {
"responses": {
"200": {
"description": "The batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Retrieve information about a specific batch.",
"description": "Retrieve information about a specific batch.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to retrieve.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/batches/{batch_id}/cancel": {
"post": {
"responses": {
"200": {
"description": "The updated batch object.",
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/Batch"
}
}
}
},
"400": {
"$ref": "#/components/responses/BadRequest400"
},
"429": {
"$ref": "#/components/responses/TooManyRequests429"
},
"500": {
"$ref": "#/components/responses/InternalServerError500"
},
"default": {
"$ref": "#/components/responses/DefaultError"
}
},
"tags": [
"Batches"
],
"summary": "Cancel a batch that is in progress.",
"description": "Cancel a batch that is in progress.",
"parameters": [
{
"name": "batch_id",
"in": "path",
"description": "The ID of the batch to cancel.",
"required": true,
"schema": {
"type": "string"
}
}
],
"deprecated": false
}
},
"/v1/chat/completions": { "/v1/chat/completions": {
"get": { "get": {
"responses": { "responses": {
@ -5677,6 +5864,451 @@
"title": "Error", "title": "Error",
"description": "Error response from the API. Roughly follows RFC 7807." "description": "Error response from the API. Roughly follows RFC 7807."
}, },
"ListBatchesResponse": {
"type": "object",
"properties": {
"object": {
"type": "string",
"const": "list",
"default": "list"
},
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
}
},
"first_id": {
"type": "string"
},
"last_id": {
"type": "string"
},
"has_more": {
"type": "boolean",
"default": false
}
},
"additionalProperties": false,
"required": [
"object",
"data",
"has_more"
],
"title": "ListBatchesResponse",
"description": "Response containing a list of batch objects."
},
"CreateBatchRequest": {
"type": "object",
"properties": {
"input_file_id": {
"type": "string",
"description": "The ID of an uploaded file containing requests for the batch."
},
"endpoint": {
"type": "string",
"description": "The endpoint to be used for all requests in the batch."
},
"completion_window": {
"type": "string",
"const": "24h",
"description": "The time window within which the batch should be processed."
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
},
"description": "Optional metadata for the batch."
},
"idempotency_key": {
"type": "string",
"description": "Optional idempotency key. When provided, enables idempotent behavior."
}
},
"additionalProperties": false,
"required": [
"input_file_id",
"endpoint",
"completion_window"
],
"title": "CreateBatchRequest"
},
"Batch": {
"type": "object",
"properties": {
"id": {
"type": "string"
},
"completion_window": {
"type": "string"
},
"created_at": {
"type": "integer"
},
"endpoint": {
"type": "string"
},
"input_file_id": {
"type": "string"
},
"object": {
"type": "string",
"const": "batch"
},
"status": {
"type": "string",
"enum": [
"validating",
"failed",
"in_progress",
"finalizing",
"completed",
"expired",
"cancelling",
"cancelled"
]
},
"cancelled_at": {
"type": "integer"
},
"cancelling_at": {
"type": "integer"
},
"completed_at": {
"type": "integer"
},
"error_file_id": {
"type": "string"
},
"errors": {
"type": "object",
"properties": {
"data": {
"type": "array",
"items": {
"type": "object",
"properties": {
"code": {
"type": "string"
},
"line": {
"type": "integer"
},
"message": {
"type": "string"
},
"param": {
"type": "string"
}
},
"additionalProperties": false,
"title": "BatchError"
}
},
"object": {
"type": "string"
}
},
"additionalProperties": false,
"title": "Errors"
},
"expired_at": {
"type": "integer"
},
"expires_at": {
"type": "integer"
},
"failed_at": {
"type": "integer"
},
"finalizing_at": {
"type": "integer"
},
"in_progress_at": {
"type": "integer"
},
"metadata": {
"type": "object",
"additionalProperties": {
"type": "string"
}
},
"model": {
"type": "string"
},
"output_file_id": {
"type": "string"
},
"request_counts": {
"type": "object",
"properties": {
"completed": {
"type": "integer"
},
"failed": {
"type": "integer"
},
"total": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"completed",
"failed",
"total"
],
"title": "BatchRequestCounts"
},
"usage": {
"type": "object",
"properties": {
"input_tokens": {
"type": "integer"
},
"input_tokens_details": {
"type": "object",
"properties": {
"cached_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"cached_tokens"
],
"title": "InputTokensDetails"
},
"output_tokens": {
"type": "integer"
},
"output_tokens_details": {
"type": "object",
"properties": {
"reasoning_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"reasoning_tokens"
],
"title": "OutputTokensDetails"
},
"total_tokens": {
"type": "integer"
}
},
"additionalProperties": false,
"required": [
"input_tokens",
"input_tokens_details",
"output_tokens",
"output_tokens_details",
"total_tokens"
],
"title": "BatchUsage"
}
},
"additionalProperties": false,
"required": [
"id",
"completion_window",
"created_at",
"endpoint",
"input_file_id",
"object",
"status"
],
"title": "Batch"
},
"Order": { "Order": {
"type": "string", "type": "string",
"enum": [ "enum": [
@ -7368,16 +8000,53 @@
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage" "$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
},
{
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
], ],
"discriminator": { "discriminator": {
"propertyName": "type", "propertyName": "type",
"mapping": { "mapping": {
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText", "input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage" "input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
} }
} }
}, },
"OpenAIResponseInputMessageContentFile": {
"type": "object",
"properties": {
"type": {
"type": "string",
"const": "input_file",
"default": "input_file",
"description": "The type of the input item. Always `input_file`."
},
"file_data": {
"type": "string",
"description": "The data of the file to be sent to the model."
},
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"file_url": {
"type": "string",
"description": "The URL of the file to be sent to the model."
},
"filename": {
"type": "string",
"description": "The name of the file to be sent to the model."
}
},
"additionalProperties": false,
"required": [
"type"
],
"title": "OpenAIResponseInputMessageContentFile",
"description": "File content for input messages in OpenAI response format."
},
"OpenAIResponseInputMessageContentImage": { "OpenAIResponseInputMessageContentImage": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -7405,6 +8074,10 @@
"default": "input_image", "default": "input_image",
"description": "Content type identifier, always \"input_image\"" "description": "Content type identifier, always \"input_image\""
}, },
"file_id": {
"type": "string",
"description": "(Optional) The ID of the file to be sent to the model."
},
"image_url": { "image_url": {
"type": "string", "type": "string",
"description": "(Optional) URL of the image content" "description": "(Optional) URL of the image content"
@ -8977,29 +9650,14 @@
"OpenAIResponseInput": { "OpenAIResponseInput": {
"oneOf": [ "oneOf": [
{ {
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall" "$ref": "#/components/schemas/OpenAIResponseOutput"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
}, },
{ {
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput" "$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse" "$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
}, },
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
},
{
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
},
{ {
"$ref": "#/components/schemas/OpenAIResponseMessage" "$ref": "#/components/schemas/OpenAIResponseMessage"
} }
@ -9208,6 +9866,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -9303,6 +9965,32 @@
} }
} }
}, },
"OpenAIResponsePrompt": {
"type": "object",
"properties": {
"id": {
"type": "string",
"description": "Unique identifier of the prompt template"
},
"variables": {
"type": "object",
"additionalProperties": {
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
},
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
},
"version": {
"type": "string",
"description": "Version number of the prompt to use (defaults to latest if not specified)"
}
},
"additionalProperties": false,
"required": [
"id"
],
"title": "OpenAIResponsePrompt",
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
},
"OpenAIResponseText": { "OpenAIResponseText": {
"type": "object", "type": "object",
"properties": { "properties": {
@ -9673,6 +10361,10 @@
"type": "string", "type": "string",
"description": "The underlying LLM used for completions." "description": "The underlying LLM used for completions."
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Prompt object with ID, version, and variables."
},
"instructions": { "instructions": {
"type": "string" "type": "string"
}, },
@ -9761,6 +10453,10 @@
"type": "string", "type": "string",
"description": "(Optional) ID of the previous response in a conversation" "description": "(Optional) ID of the previous response in a conversation"
}, },
"prompt": {
"$ref": "#/components/schemas/OpenAIResponsePrompt",
"description": "(Optional) Reference to a prompt template and its variables."
},
"status": { "status": {
"type": "string", "type": "string",
"description": "Current status of the response generation" "description": "Current status of the response generation"
@ -13099,7 +13795,7 @@
}, },
"description": "List of documents to index in the RAG system" "description": "List of documents to index in the RAG system"
}, },
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "ID of the vector database to store the document embeddings" "description": "ID of the vector database to store the document embeddings"
}, },
@ -13111,7 +13807,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"documents", "documents",
"vector_db_id", "vector_store_id",
"chunk_size_in_tokens" "chunk_size_in_tokens"
], ],
"title": "InsertRequest" "title": "InsertRequest"
@ -13302,7 +13998,7 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The query content to search for in the indexed documents" "description": "The query content to search for in the indexed documents"
}, },
"vector_db_ids": { "vector_store_ids": {
"type": "array", "type": "array",
"items": { "items": {
"type": "string" "type": "string"
@ -13317,7 +14013,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"vector_db_ids" "vector_store_ids"
], ],
"title": "QueryRequest" "title": "QueryRequest"
}, },
@ -13505,6 +14201,10 @@
"$ref": "#/components/schemas/InterleavedContent", "$ref": "#/components/schemas/InterleavedContent",
"description": "The content of the chunk, which can be interleaved text, images, or other types." "description": "The content of the chunk, which can be interleaved text, images, or other types."
}, },
"chunk_id": {
"type": "string",
"description": "Unique identifier for the chunk. Must be provided explicitly."
},
"metadata": { "metadata": {
"type": "object", "type": "object",
"additionalProperties": { "additionalProperties": {
@ -13538,10 +14238,6 @@
}, },
"description": "Optional embedding for the chunk. If not provided, it will be computed later." "description": "Optional embedding for the chunk. If not provided, it will be computed later."
}, },
"stored_chunk_id": {
"type": "string",
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
},
"chunk_metadata": { "chunk_metadata": {
"$ref": "#/components/schemas/ChunkMetadata", "$ref": "#/components/schemas/ChunkMetadata",
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality." "description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
@ -13550,6 +14246,7 @@
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"content", "content",
"chunk_id",
"metadata" "metadata"
], ],
"title": "Chunk", "title": "Chunk",
@ -13610,7 +14307,7 @@
"InsertChunksRequest": { "InsertChunksRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "The identifier of the vector database to insert the chunks into." "description": "The identifier of the vector database to insert the chunks into."
}, },
@ -13628,7 +14325,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"vector_db_id", "vector_store_id",
"chunks" "chunks"
], ],
"title": "InsertChunksRequest" "title": "InsertChunksRequest"
@ -13636,7 +14333,7 @@
"QueryChunksRequest": { "QueryChunksRequest": {
"type": "object", "type": "object",
"properties": { "properties": {
"vector_db_id": { "vector_store_id": {
"type": "string", "type": "string",
"description": "The identifier of the vector database to query." "description": "The identifier of the vector database to query."
}, },
@ -13673,7 +14370,7 @@
}, },
"additionalProperties": false, "additionalProperties": false,
"required": [ "required": [
"vector_db_id", "vector_store_id",
"query" "query"
], ],
"title": "QueryChunksRequest" "title": "QueryChunksRequest"
@ -15452,7 +16149,6 @@
}, },
"max_tokens": { "max_tokens": {
"type": "integer", "type": "integer",
"default": 0,
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length." "description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
}, },
"repetition_penalty": { "repetition_penalty": {
@ -15735,7 +16431,7 @@
"const": "memory_retrieval", "const": "memory_retrieval",
"default": "memory_retrieval" "default": "memory_retrieval"
}, },
"vector_db_ids": { "vector_store_ids": {
"type": "string", "type": "string",
"description": "The IDs of the vector databases to retrieve context from." "description": "The IDs of the vector databases to retrieve context from."
}, },
@ -15749,7 +16445,7 @@
"turn_id", "turn_id",
"step_id", "step_id",
"step_type", "step_type",
"vector_db_ids", "vector_store_ids",
"inserted_context" "inserted_context"
], ],
"title": "MemoryRetrievalStep", "title": "MemoryRetrievalStep",
@ -17897,6 +18593,11 @@
"description": "APIs for creating and interacting with agentic systems.", "description": "APIs for creating and interacting with agentic systems.",
"x-displayName": "Agents" "x-displayName": "Agents"
}, },
{
"name": "Batches",
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
},
{ {
"name": "Benchmarks", "name": "Benchmarks",
"description": "" "description": ""
@ -17991,6 +18692,7 @@
"name": "Operations", "name": "Operations",
"tags": [ "tags": [
"Agents", "Agents",
"Batches",
"Benchmarks", "Benchmarks",
"Conversations", "Conversations",
"DatasetIO", "DatasetIO",

View file

@ -15,6 +15,141 @@ info:
servers: servers:
- url: http://any-hosted-llama-stack.com - url: http://any-hosted-llama-stack.com
paths: paths:
/v1/batches:
get:
responses:
'200':
description: A list of batch objects.
content:
application/json:
schema:
$ref: '#/components/schemas/ListBatchesResponse'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: List all batches for the current user.
description: List all batches for the current user.
parameters:
- name: after
in: query
description: >-
A cursor for pagination; returns batches after this batch ID.
required: false
schema:
type: string
- name: limit
in: query
description: >-
Number of batches to return (default 20, max 100).
required: true
schema:
type: integer
deprecated: false
post:
responses:
'200':
description: The created batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Create a new batch for processing multiple API requests.
description: >-
Create a new batch for processing multiple API requests.
parameters: []
requestBody:
content:
application/json:
schema:
$ref: '#/components/schemas/CreateBatchRequest'
required: true
deprecated: false
/v1/batches/{batch_id}:
get:
responses:
'200':
description: The batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: >-
Retrieve information about a specific batch.
description: >-
Retrieve information about a specific batch.
parameters:
- name: batch_id
in: path
description: The ID of the batch to retrieve.
required: true
schema:
type: string
deprecated: false
/v1/batches/{batch_id}/cancel:
post:
responses:
'200':
description: The updated batch object.
content:
application/json:
schema:
$ref: '#/components/schemas/Batch'
'400':
$ref: '#/components/responses/BadRequest400'
'429':
$ref: >-
#/components/responses/TooManyRequests429
'500':
$ref: >-
#/components/responses/InternalServerError500
default:
$ref: '#/components/responses/DefaultError'
tags:
- Batches
summary: Cancel a batch that is in progress.
description: Cancel a batch that is in progress.
parameters:
- name: batch_id
in: path
description: The ID of the batch to cancel.
required: true
schema:
type: string
deprecated: false
/v1/chat/completions: /v1/chat/completions:
get: get:
responses: responses:
@ -4212,6 +4347,331 @@ components:
title: Error title: Error
description: >- description: >-
Error response from the API. Roughly follows RFC 7807. Error response from the API. Roughly follows RFC 7807.
ListBatchesResponse:
type: object
properties:
object:
type: string
const: list
default: list
data:
type: array
items:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
first_id:
type: string
last_id:
type: string
has_more:
type: boolean
default: false
additionalProperties: false
required:
- object
- data
- has_more
title: ListBatchesResponse
description: >-
Response containing a list of batch objects.
CreateBatchRequest:
type: object
properties:
input_file_id:
type: string
description: >-
The ID of an uploaded file containing requests for the batch.
endpoint:
type: string
description: >-
The endpoint to be used for all requests in the batch.
completion_window:
type: string
const: 24h
description: >-
The time window within which the batch should be processed.
metadata:
type: object
additionalProperties:
type: string
description: Optional metadata for the batch.
idempotency_key:
type: string
description: >-
Optional idempotency key. When provided, enables idempotent behavior.
additionalProperties: false
required:
- input_file_id
- endpoint
- completion_window
title: CreateBatchRequest
Batch:
type: object
properties:
id:
type: string
completion_window:
type: string
created_at:
type: integer
endpoint:
type: string
input_file_id:
type: string
object:
type: string
const: batch
status:
type: string
enum:
- validating
- failed
- in_progress
- finalizing
- completed
- expired
- cancelling
- cancelled
cancelled_at:
type: integer
cancelling_at:
type: integer
completed_at:
type: integer
error_file_id:
type: string
errors:
type: object
properties:
data:
type: array
items:
type: object
properties:
code:
type: string
line:
type: integer
message:
type: string
param:
type: string
additionalProperties: false
title: BatchError
object:
type: string
additionalProperties: false
title: Errors
expired_at:
type: integer
expires_at:
type: integer
failed_at:
type: integer
finalizing_at:
type: integer
in_progress_at:
type: integer
metadata:
type: object
additionalProperties:
type: string
model:
type: string
output_file_id:
type: string
request_counts:
type: object
properties:
completed:
type: integer
failed:
type: integer
total:
type: integer
additionalProperties: false
required:
- completed
- failed
- total
title: BatchRequestCounts
usage:
type: object
properties:
input_tokens:
type: integer
input_tokens_details:
type: object
properties:
cached_tokens:
type: integer
additionalProperties: false
required:
- cached_tokens
title: InputTokensDetails
output_tokens:
type: integer
output_tokens_details:
type: object
properties:
reasoning_tokens:
type: integer
additionalProperties: false
required:
- reasoning_tokens
title: OutputTokensDetails
total_tokens:
type: integer
additionalProperties: false
required:
- input_tokens
- input_tokens_details
- output_tokens
- output_tokens_details
- total_tokens
title: BatchUsage
additionalProperties: false
required:
- id
- completion_window
- created_at
- endpoint
- input_file_id
- object
- status
title: Batch
Order: Order:
type: string type: string
enum: enum:
@ -5474,11 +5934,44 @@ components:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage' - $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
discriminator: discriminator:
propertyName: type propertyName: type
mapping: mapping:
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText' input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage' input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
OpenAIResponseInputMessageContentFile:
type: object
properties:
type:
type: string
const: input_file
default: input_file
description: >-
The type of the input item. Always `input_file`.
file_data:
type: string
description: >-
The data of the file to be sent to the model.
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
file_url:
type: string
description: >-
The URL of the file to be sent to the model.
filename:
type: string
description: >-
The name of the file to be sent to the model.
additionalProperties: false
required:
- type
title: OpenAIResponseInputMessageContentFile
description: >-
File content for input messages in OpenAI response format.
OpenAIResponseInputMessageContentImage: OpenAIResponseInputMessageContentImage:
type: object type: object
properties: properties:
@ -5499,6 +5992,10 @@ components:
default: input_image default: input_image
description: >- description: >-
Content type identifier, always "input_image" Content type identifier, always "input_image"
file_id:
type: string
description: >-
(Optional) The ID of the file to be sent to the model.
image_url: image_url:
type: string type: string
description: (Optional) URL of the image content description: (Optional) URL of the image content
@ -6735,14 +7232,9 @@ components:
Error details for failed OpenAI response requests. Error details for failed OpenAI response requests.
OpenAIResponseInput: OpenAIResponseInput:
oneOf: oneOf:
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall' - $ref: '#/components/schemas/OpenAIResponseOutput'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput' - $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse' - $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
- $ref: '#/components/schemas/OpenAIResponseMessage' - $ref: '#/components/schemas/OpenAIResponseMessage'
OpenAIResponseInputToolFileSearch: OpenAIResponseInputToolFileSearch:
type: object type: object
@ -6898,6 +7390,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -6971,6 +7467,30 @@ components:
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall' mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools' mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest' mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
OpenAIResponsePrompt:
type: object
properties:
id:
type: string
description: Unique identifier of the prompt template
variables:
type: object
additionalProperties:
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
description: >-
Dictionary of variable names to OpenAIResponseInputMessageContent structure
for template substitution. The substitution values can either be strings,
or other Response input types like images or files.
version:
type: string
description: >-
Version number of the prompt to use (defaults to latest if not specified)
additionalProperties: false
required:
- id
title: OpenAIResponsePrompt
description: >-
OpenAI compatible Prompt object that is used in OpenAI responses.
OpenAIResponseText: OpenAIResponseText:
type: object type: object
properties: properties:
@ -7228,6 +7748,10 @@ components:
model: model:
type: string type: string
description: The underlying LLM used for completions. description: The underlying LLM used for completions.
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Prompt object with ID, version, and variables.
instructions: instructions:
type: string type: string
previous_response_id: previous_response_id:
@ -7305,6 +7829,10 @@ components:
type: string type: string
description: >- description: >-
(Optional) ID of the previous response in a conversation (Optional) ID of the previous response in a conversation
prompt:
$ref: '#/components/schemas/OpenAIResponsePrompt'
description: >-
(Optional) Reference to a prompt template and its variables.
status: status:
type: string type: string
description: >- description: >-
@ -9867,7 +10395,7 @@ components:
$ref: '#/components/schemas/RAGDocument' $ref: '#/components/schemas/RAGDocument'
description: >- description: >-
List of documents to index in the RAG system List of documents to index in the RAG system
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
ID of the vector database to store the document embeddings ID of the vector database to store the document embeddings
@ -9878,7 +10406,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- documents - documents
- vector_db_id - vector_store_id
- chunk_size_in_tokens - chunk_size_in_tokens
title: InsertRequest title: InsertRequest
DefaultRAGQueryGeneratorConfig: DefaultRAGQueryGeneratorConfig:
@ -10049,7 +10577,7 @@ components:
$ref: '#/components/schemas/InterleavedContent' $ref: '#/components/schemas/InterleavedContent'
description: >- description: >-
The query content to search for in the indexed documents The query content to search for in the indexed documents
vector_db_ids: vector_store_ids:
type: array type: array
items: items:
type: string type: string
@ -10062,7 +10590,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- vector_db_ids - vector_store_ids
title: QueryRequest title: QueryRequest
RAGQueryResult: RAGQueryResult:
type: object type: object
@ -10190,6 +10718,10 @@ components:
description: >- description: >-
The content of the chunk, which can be interleaved text, images, or other The content of the chunk, which can be interleaved text, images, or other
types. types.
chunk_id:
type: string
description: >-
Unique identifier for the chunk. Must be provided explicitly.
metadata: metadata:
type: object type: object
additionalProperties: additionalProperties:
@ -10210,10 +10742,6 @@ components:
description: >- description: >-
Optional embedding for the chunk. If not provided, it will be computed Optional embedding for the chunk. If not provided, it will be computed
later. later.
stored_chunk_id:
type: string
description: >-
The chunk ID that is stored in the vector database. Used for backend functionality.
chunk_metadata: chunk_metadata:
$ref: '#/components/schemas/ChunkMetadata' $ref: '#/components/schemas/ChunkMetadata'
description: >- description: >-
@ -10222,6 +10750,7 @@ components:
additionalProperties: false additionalProperties: false
required: required:
- content - content
- chunk_id
- metadata - metadata
title: Chunk title: Chunk
description: >- description: >-
@ -10286,7 +10815,7 @@ components:
InsertChunksRequest: InsertChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to insert the chunks into. The identifier of the vector database to insert the chunks into.
@ -10305,13 +10834,13 @@ components:
description: The time to live of the chunks. description: The time to live of the chunks.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- chunks - chunks
title: InsertChunksRequest title: InsertChunksRequest
QueryChunksRequest: QueryChunksRequest:
type: object type: object
properties: properties:
vector_db_id: vector_store_id:
type: string type: string
description: >- description: >-
The identifier of the vector database to query. The identifier of the vector database to query.
@ -10331,7 +10860,7 @@ components:
description: The parameters of the query. description: The parameters of the query.
additionalProperties: false additionalProperties: false
required: required:
- vector_db_id - vector_store_id
- query - query
title: QueryChunksRequest title: QueryChunksRequest
QueryChunksResponse: QueryChunksResponse:
@ -11600,7 +12129,6 @@ components:
description: The sampling strategy. description: The sampling strategy.
max_tokens: max_tokens:
type: integer type: integer
default: 0
description: >- description: >-
The maximum number of tokens that can be generated in the completion. The maximum number of tokens that can be generated in the completion.
The token count of your prompt plus max_tokens cannot exceed the model's The token count of your prompt plus max_tokens cannot exceed the model's
@ -11850,7 +12378,7 @@ components:
description: Type of the step in an agent turn. description: Type of the step in an agent turn.
const: memory_retrieval const: memory_retrieval
default: memory_retrieval default: memory_retrieval
vector_db_ids: vector_store_ids:
type: string type: string
description: >- description: >-
The IDs of the vector databases to retrieve context from. The IDs of the vector databases to retrieve context from.
@ -11863,7 +12391,7 @@ components:
- turn_id - turn_id
- step_id - step_id
- step_type - step_type
- vector_db_ids - vector_store_ids
- inserted_context - inserted_context
title: MemoryRetrievalStep title: MemoryRetrievalStep
description: >- description: >-
@ -13460,6 +13988,19 @@ tags:
description: >- description: >-
APIs for creating and interacting with agentic systems. APIs for creating and interacting with agentic systems.
x-displayName: Agents x-displayName: Agents
- name: Batches
description: >-
The API is designed to allow use of openai client libraries for seamless integration.
This API provides the following extensions:
- idempotent batch creation
Note: This API is currently under active development and may undergo changes.
x-displayName: >-
The Batches API enables efficient processing of multiple requests in a single
operation, particularly useful for processing large datasets, batch evaluation
workflows, and cost-effective inference at scale.
- name: Benchmarks - name: Benchmarks
description: '' description: ''
- name: Conversations - name: Conversations
@ -13534,6 +14075,7 @@ x-tagGroups:
- name: Operations - name: Operations
tags: tags:
- Agents - Agents
- Batches
- Benchmarks - Benchmarks
- Conversations - Conversations
- DatasetIO - DatasetIO

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .telemetry import *

View file

@ -1,250 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
import os
import threading
from typing import Any
from opentelemetry import metrics, trace
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
from opentelemetry.sdk.metrics import MeterProvider
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import BatchSpanProcessor
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
from llama_stack.apis.telemetry import (
Event,
MetricEvent,
SpanEndPayload,
SpanStartPayload,
SpanStatus,
StructuredLogEvent,
UnstructuredLogEvent,
)
from llama_stack.apis.telemetry import (
Telemetry as TelemetryBase,
)
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
from llama_stack.log import get_logger
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
"active_spans": {},
"counters": {},
"gauges": {},
"up_down_counters": {},
}
_global_lock = threading.Lock()
_TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry")
def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span:
return span.is_recording()
class Telemetry(TelemetryBase):
def __init__(self) -> None:
self.meter = None
global _TRACER_PROVIDER
# Initialize the correct span processor based on the provider state.
# This is needed since once the span processor is set, it cannot be unset.
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
# Since the library client can be recreated multiple times in a notebook,
# the kernel will hold on to the span processor and cause duplicate spans to be written.
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
if _TRACER_PROVIDER is None:
provider = TracerProvider()
trace.set_tracer_provider(provider)
_TRACER_PROVIDER = provider
# Use single OTLP endpoint for all telemetry signals
# Let OpenTelemetry SDK handle endpoint construction automatically
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader])
metrics.set_meter_provider(metric_provider)
self.is_otel_endpoint_set = True
else:
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
self.is_otel_endpoint_set = False
self.meter = metrics.get_meter(__name__)
self._lock = _global_lock
async def initialize(self) -> None:
pass
async def shutdown(self) -> None:
if self.is_otel_endpoint_set:
trace.get_tracer_provider().force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent):
self._log_unstructured(event, ttl_seconds)
elif isinstance(event, MetricEvent):
self._log_metric(event)
elif isinstance(event, StructuredLogEvent):
self._log_structured(event, ttl_seconds)
else:
raise ValueError(f"Unknown event type: {event}")
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
# Use global storage instead of instance storage
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=event.type.value,
attributes={
"message": event.message,
"severity": event.severity.value,
"__ttl__": ttl_seconds,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
else:
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["counters"]:
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
name=name,
unit=unit,
description=f"Counter for {name}",
)
return _GLOBAL_STORAGE["counters"][name]
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["gauges"]:
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
name=name,
unit=unit,
description=f"Gauge for {name}",
)
return _GLOBAL_STORAGE["gauges"][name]
def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span
try:
with self._lock:
# Only try to add to span if we have a valid span_id
if event.span_id:
try:
span_id = int(event.span_id, 16)
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
span.add_event(
name=f"metric.{event.metric}",
attributes={
"value": event.value,
"unit": event.unit,
**(event.attributes or {}),
},
timestamp=timestamp_ns,
)
except (ValueError, KeyError):
# Invalid span_id or span not found, but we already logged to console above
pass
except Exception:
# Lock acquisition failed
logger.debug("Failed to acquire lock to add metric to span")
# Log to OpenTelemetry meter if available
if self.meter is None:
return
if isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=event.attributes)
elif isinstance(event.value, float):
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
up_down_counter.add(event.value, attributes=event.attributes)
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
assert self.meter is not None
if name not in _GLOBAL_STORAGE["up_down_counters"]:
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
name=name,
unit=unit,
description=f"UpDownCounter for {name}",
)
return _GLOBAL_STORAGE["up_down_counters"][name]
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
with self._lock:
span_id = int(event.span_id, 16)
tracer = trace.get_tracer(__name__)
if event.attributes is None:
event.attributes = {}
event.attributes["__ttl__"] = ttl_seconds
# Extract these W3C trace context attributes so they are not written to
# underlying storage, as we just need them to propagate the trace context.
traceparent = event.attributes.pop("traceparent", None)
tracestate = event.attributes.pop("tracestate", None)
if traceparent:
# If we have a traceparent header value, we're not the root span.
for root_attribute in ROOT_SPAN_MARKERS:
event.attributes.pop(root_attribute, None)
if isinstance(event.payload, SpanStartPayload):
# Check if span already exists to prevent duplicates
if span_id in _GLOBAL_STORAGE["active_spans"]:
return
context = None
if event.payload.parent_span_id:
parent_span_id = int(event.payload.parent_span_id, 16)
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
context = trace.set_span_in_context(parent_span)
elif traceparent:
carrier = {
"traceparent": traceparent,
"tracestate": tracestate,
}
context = TraceContextTextMapPropagator().extract(carrier=carrier)
span = tracer.start_span(
name=event.payload.name,
context=context,
attributes=event.attributes or {},
)
_GLOBAL_STORAGE["active_spans"][span_id] = span
elif isinstance(event.payload, SpanEndPayload):
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
if span:
if event.attributes:
span.set_attributes(event.attributes)
status = (
trace.Status(status_code=trace.StatusCode.OK)
if event.payload.status == SpanStatus.OK
else trace.Status(status_code=trace.StatusCode.ERROR)
)
span.set_status(status)
span.end()
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
else:
raise ValueError(f"Unknown structured log event: {event}")

View file

@ -1,40 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from collections.abc import AsyncGenerator
from contextvars import ContextVar
def preserve_contexts_async_generator[T](
gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
) -> AsyncGenerator[T, None]:
"""
Wraps an async generator to preserve context variables across iterations.
This is needed because we start a new asyncio event loop for each streaming request,
and we need to preserve the context across the event loop boundary.
"""
# Capture initial context values
initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
async def wrapper() -> AsyncGenerator[T, None]:
while True:
try:
# Restore context values before any await
for context_var in context_vars:
context_var.set(initial_context_values[context_var.name])
item = await gen.__anext__()
# Update our tracked values with any changes made during this iteration
for context_var in context_vars:
initial_context_values[context_var.name] = context_var.get()
yield item
except StopAsyncIteration:
break
return wrapper()

View file

@ -1,61 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from llama_stack.log import get_logger
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
from . import NVIDIAConfig
from .utils import _is_nvidia_hosted
logger = get_logger(name=__name__, category="inference::nvidia")
class NVIDIAInferenceAdapter(OpenAIMixin):
config: NVIDIAConfig
"""
NVIDIA Inference Adapter for Llama Stack.
"""
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
embedding_model_metadata: dict[str, dict[str, int]] = {
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
}
async def initialize(self) -> None:
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
if _is_nvidia_hosted(self.config):
if not self.config.auth_credential:
raise RuntimeError(
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
)
def get_api_key(self) -> str:
"""
Get the API key for OpenAI mixin.
:return: The NVIDIA API key
"""
if self.config.auth_credential:
return self.config.auth_credential.get_secret_value()
if not _is_nvidia_hosted(self.config):
return "NO KEY REQUIRED"
return None
def get_base_url(self) -> str:
"""
Get the base URL for OpenAI mixin.
:return: The NVIDIA API base URL
"""
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url

View file

@ -31,7 +31,7 @@ dependencies = [
"jinja2>=3.1.6", "jinja2>=3.1.6",
"jsonschema", "jsonschema",
"llama-stack-client>=0.3.0", "llama-stack-client>=0.3.0",
"openai>=1.107", # for expires_after support "openai>=2.5.0",
"prompt-toolkit", "prompt-toolkit",
"python-dotenv", "python-dotenv",
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support. "pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
@ -67,17 +67,48 @@ dev = [
"pytest-cov", "pytest-cov",
"pytest-html", "pytest-html",
"pytest-json-report", "pytest-json-report",
"pytest-socket", # For blocking network access in unit tests "pytest-socket", # For blocking network access in unit tests
"nbval", # For notebook testing "nbval", # For notebook testing
"black", "black",
"ruff", "ruff",
"mypy",
"pre-commit",
"ruamel.yaml", # needed for openapi generator
]
# Type checking dependencies - includes type stubs and optional runtime dependencies
# needed for complete mypy coverage across all optional features
type_checking = [
"types-requests", "types-requests",
"types-setuptools", "types-setuptools",
"pre-commit", "types-jsonschema",
"ruamel.yaml", # needed for openapi generator "pandas-stubs",
"types-psutil",
"types-tqdm",
"boto3-stubs[s3]",
"streamlit",
"streamlit-option-menu",
"pandas",
"anthropic",
"databricks-sdk",
"fairscale",
"torchtune",
"trl",
"peft",
"datasets",
"together",
"nest-asyncio",
"pymongo",
"torchvision",
"sqlite-vec",
"faiss-cpu",
"lm-format-enforcer",
"mcp",
"ollama",
] ]
# These are the dependencies required for running unit tests. # These are the dependencies required for running unit tests.
unit = [ unit = [
"anthropic",
"databricks-sdk",
"sqlite-vec", "sqlite-vec",
"ollama", "ollama",
"aiosqlite", "aiosqlite",
@ -151,7 +182,7 @@ llama = "llama_stack.cli.llama:main"
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned" install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
[tool.setuptools.packages.find] [tool.setuptools.packages.find]
where = ["."] where = ["src"]
include = ["llama_stack", "llama_stack.*"] include = ["llama_stack", "llama_stack.*"]
[[tool.uv.index]] [[tool.uv.index]]
@ -218,17 +249,17 @@ unfixable = [
# Ignore the following errors for the following files # Ignore the following errors for the following files
[tool.ruff.lint.per-file-ignores] [tool.ruff.lint.per-file-ignores]
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests "tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"] "src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [ "src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
"RUF001", "RUF001",
"PLE2515", "PLE2515",
] ]
"llama_stack/apis/**/__init__.py" = [ "src/llama_stack/apis/**/__init__.py" = [
"F403", "F403",
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API ] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
[tool.mypy] [tool.mypy]
mypy_path = ["llama_stack"] mypy_path = ["src"]
packages = ["llama_stack"] packages = ["llama_stack"]
plugins = ['pydantic.mypy'] plugins = ['pydantic.mypy']
disable_error_code = [] disable_error_code = []
@ -240,82 +271,91 @@ follow_imports = "silent"
# to exclude the entire directory. # to exclude the entire directory.
exclude = [ exclude = [
# As we fix more and more of these, we should remove them from the list # As we fix more and more of these, we should remove them from the list
"^llama_stack.core/build\\.py$", "^src/llama_stack/core/build\\.py$",
"^llama_stack.core/client\\.py$", "^src/llama_stack/core/client\\.py$",
"^llama_stack.core/request_headers\\.py$", "^src/llama_stack/core/request_headers\\.py$",
"^llama_stack.core/routers/", "^src/llama_stack/core/routers/",
"^llama_stack.core/routing_tables/", "^src/llama_stack/core/routing_tables/",
"^llama_stack.core/server/endpoints\\.py$", "^src/llama_stack/core/server/endpoints\\.py$",
"^llama_stack.core/server/server\\.py$", "^src/llama_stack/core/server/server\\.py$",
"^llama_stack.core/stack\\.py$", "^src/llama_stack/core/stack\\.py$",
"^llama_stack.core/store/registry\\.py$", "^src/llama_stack/core/store/registry\\.py$",
"^llama_stack.core/utils/exec\\.py$", "^src/llama_stack/core/utils/exec\\.py$",
"^llama_stack.core/utils/prompt_for_config\\.py$", "^src/llama_stack/core/utils/prompt_for_config\\.py$",
"^llama_stack/models/llama/llama3/interface\\.py$", "^src/llama_stack/models/llama/llama3/interface\\.py$",
"^llama_stack/models/llama/llama3/tokenizer\\.py$", "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^llama_stack/models/llama/llama3/tool_utils\\.py$", "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^llama_stack/providers/inline/agents/meta_reference/", "^src/llama_stack/providers/inline/datasetio/localfs/",
"^llama_stack/providers/inline/datasetio/localfs/", "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$", "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^src/llama_stack/models/llama/llama3/generation\\.py$",
"^llama_stack/models/llama/llama3/generation\\.py$", "^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
"^llama_stack/models/llama/llama3/multimodal/model\\.py$", "^src/llama_stack/models/llama/llama4/",
"^llama_stack/models/llama/llama4/", "^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$", "^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
"^llama_stack/providers/inline/post_training/common/validator\\.py$", "^src/llama_stack/providers/inline/safety/code_scanner/",
"^llama_stack/providers/inline/safety/code_scanner/", "^src/llama_stack/providers/inline/safety/llama_guard/",
"^llama_stack/providers/inline/safety/llama_guard/", "^src/llama_stack/providers/inline/scoring/basic/",
"^llama_stack/providers/inline/scoring/basic/", "^src/llama_stack/providers/inline/scoring/braintrust/",
"^llama_stack/providers/inline/scoring/braintrust/", "^src/llama_stack/providers/inline/scoring/llm_as_judge/",
"^llama_stack/providers/inline/scoring/llm_as_judge/", "^src/llama_stack/providers/remote/agents/sample/",
"^llama_stack/providers/remote/agents/sample/", "^src/llama_stack/providers/remote/datasetio/huggingface/",
"^llama_stack/providers/remote/datasetio/huggingface/", "^src/llama_stack/providers/remote/datasetio/nvidia/",
"^llama_stack/providers/remote/datasetio/nvidia/", "^src/llama_stack/providers/remote/inference/bedrock/",
"^llama_stack/providers/remote/inference/bedrock/", "^src/llama_stack/providers/remote/inference/nvidia/",
"^llama_stack/providers/remote/inference/nvidia/", "^src/llama_stack/providers/remote/inference/passthrough/",
"^llama_stack/providers/remote/inference/passthrough/", "^src/llama_stack/providers/remote/inference/runpod/",
"^llama_stack/providers/remote/inference/runpod/", "^src/llama_stack/providers/remote/inference/tgi/",
"^llama_stack/providers/remote/inference/tgi/", "^src/llama_stack/providers/remote/inference/watsonx/",
"^llama_stack/providers/remote/inference/watsonx/", "^src/llama_stack/providers/remote/safety/bedrock/",
"^llama_stack/providers/remote/safety/bedrock/", "^src/llama_stack/providers/remote/safety/nvidia/",
"^llama_stack/providers/remote/safety/nvidia/", "^src/llama_stack/providers/remote/safety/sambanova/",
"^llama_stack/providers/remote/safety/sambanova/", "^src/llama_stack/providers/remote/safety/sample/",
"^llama_stack/providers/remote/safety/sample/", "^src/llama_stack/providers/remote/tool_runtime/bing_search/",
"^llama_stack/providers/remote/tool_runtime/bing_search/", "^src/llama_stack/providers/remote/tool_runtime/brave_search/",
"^llama_stack/providers/remote/tool_runtime/brave_search/", "^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/", "^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
"^llama_stack/providers/remote/tool_runtime/tavily_search/", "^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/", "^src/llama_stack/providers/remote/post_training/nvidia/",
"^llama_stack/providers/remote/post_training/nvidia/", "^src/llama_stack/providers/remote/vector_io/chroma/",
"^llama_stack/providers/remote/vector_io/chroma/", "^src/llama_stack/providers/remote/vector_io/milvus/",
"^llama_stack/providers/remote/vector_io/milvus/", "^src/llama_stack/providers/remote/vector_io/pgvector/",
"^llama_stack/providers/remote/vector_io/pgvector/", "^src/llama_stack/providers/remote/vector_io/qdrant/",
"^llama_stack/providers/remote/vector_io/qdrant/", "^src/llama_stack/providers/remote/vector_io/sample/",
"^llama_stack/providers/remote/vector_io/sample/", "^src/llama_stack/providers/remote/vector_io/weaviate/",
"^llama_stack/providers/remote/vector_io/weaviate/", "^src/llama_stack/providers/utils/bedrock/client\\.py$",
"^llama_stack/providers/utils/bedrock/client\\.py$", "^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$", "^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$", "^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$", "^src/llama_stack/providers/utils/inference/model_registry\\.py$",
"^llama_stack/providers/utils/inference/model_registry\\.py$", "^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
"^llama_stack/providers/utils/inference/openai_compat\\.py$", "^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$", "^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
"^llama_stack/providers/utils/kvstore/kvstore\\.py$", "^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$", "^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$", "^src/llama_stack/providers/utils/memory/vector_store\\.py$",
"^llama_stack/providers/utils/memory/vector_store\\.py$", "^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$", "^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$", "^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$", "^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$", "^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
"^llama_stack/providers/utils/telemetry/tracing\\.py$", "^src/llama_stack/strong_typing/auxiliary\\.py$",
"^llama_stack/strong_typing/auxiliary\\.py$", "^src/llama_stack/distributions/template\\.py$",
"^llama_stack/distributions/template\\.py$",
] ]
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
# packages that lack typing annotations, do not have stubs, or are unavailable. # packages that lack typing annotations, do not have stubs, or are unavailable.
module = ["yaml", "fire"] module = [
"yaml",
"fire",
"torchtune.*",
"fairscale.*",
"torchvision.*",
"datasets",
"nest_asyncio",
"streamlit_option_menu",
"lmformatenforcer.*",
]
ignore_missing_imports = true ignore_missing_imports = true
[tool.pydantic-mypy] [tool.pydantic-mypy]

View file

@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
exit 1 exit 1
fi fi
PACKAGE_DIR="${1:-llama_stack}" PACKAGE_DIR="${1:-src/llama_stack}"
if [ ! -d "$PACKAGE_DIR" ]; then if [ ! -d "$PACKAGE_DIR" ]; then
echo "ERROR: Package directory '$PACKAGE_DIR' does not exist" echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"

View file

@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke
if template_func := getattr(module, "get_distribution_template", None): if template_func := getattr(module, "get_distribution_template", None):
distro = template_func() distro = template_func()
yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro" doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
change_tracker.add_paths(yaml_output_dir, doc_output_dir) change_tracker.add_paths(yaml_output_dir, doc_output_dir)
distro.save_distribution( distro.save_distribution(
@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
def main(): def main():
distros_dir = REPO_ROOT / "llama_stack" / "distributions" distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
change_tracker = ChangedPathTracker() change_tracker = ChangedPathTracker()
with Progress( with Progress(

View file

@ -30,8 +30,10 @@ materialize_telemetry_configs() {
local otel_cfg="${dest}/otel-collector-config.yaml" local otel_cfg="${dest}/otel-collector-config.yaml"
local prom_cfg="${dest}/prometheus.yml" local prom_cfg="${dest}/prometheus.yml"
local graf_cfg="${dest}/grafana-datasources.yaml" local graf_cfg="${dest}/grafana-datasources.yaml"
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
local dash_json="${dest}/llama-stack-dashboard.json"
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
if [ -e "$asset" ]; then if [ -e "$asset" ]; then
die "Telemetry asset ${asset} already exists; refusing to overwrite" die "Telemetry asset ${asset} already exists; refusing to overwrite"
fi fi
@ -103,6 +105,7 @@ datasources:
type: prometheus type: prometheus
access: proxy access: proxy
url: http://prometheus:9090 url: http://prometheus:9090
uid: prometheus
isDefault: true isDefault: true
editable: true editable: true
@ -112,6 +115,224 @@ datasources:
url: http://jaeger:16686 url: http://jaeger:16686
editable: true editable: true
EOF EOF
cat <<'EOF' > "$graf_dash_cfg"
apiVersion: 1
providers:
- name: 'Llama Stack'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards
EOF
# Copy the dashboard JSON inline to avoid line-length issues
cat > "$dash_json" <<'DASHBOARD_JSON'
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [{"color": "green", "value": null}]
}
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
"id": 1,
"options": {
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{
"datasource": {"type": "prometheus", "uid": "prometheus"},
"expr": "llama_stack_completion_tokens_total",
"legendFormat": "{{model_id}} ({{provider_id}})",
"refId": "A"
}
],
"title": "Completion Tokens",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
"id": 2,
"options": {
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
],
"title": "Prompt & Total Tokens",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "ms"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
"id": 3,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
],
"title": "HTTP Request Duration (p95, p99)",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
],
"title": "Total Requests",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
}
},
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
"textMode": "auto"
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
],
"title": "Active Requests",
"type": "stat"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "reqps"
}
},
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
"id": 6,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
"fieldConfig": {
"defaults": {
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
"mappings": [],
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
"unit": "Bps"
}
},
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
"id": 7,
"options": {
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
"tooltip": {"mode": "multi", "sort": "none"}
},
"targets": [
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
],
"title": "Request/Response Sizes",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 38,
"tags": ["llama-stack"],
"templating": {"list": []},
"time": {"from": "now-15m", "to": "now"},
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
"version": 0,
"weekStart": ""
}
DASHBOARD_JSON
} }
# Cleanup function to remove temporary files # Cleanup function to remove temporary files
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
-e GF_SECURITY_ADMIN_PASSWORD=admin \ -e GF_SECURITY_ADMIN_PASSWORD=admin \
-e GF_USERS_ALLOW_SIGN_UP=false \ -e GF_USERS_ALLOW_SIGN_UP=false \
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \ -v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
die "Grafana startup failed" die "Grafana startup failed"
fi fi

View file

@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
echo "=== Starting Llama Stack Server ===" echo "=== Starting Llama Stack Server ==="
export LLAMA_STACK_LOG_WIDTH=120 export LLAMA_STACK_LOG_WIDTH=120
# Configure telemetry collector for server mode
# Use a fixed port for the OTEL collector so the server can connect to it
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
export OTEL_BSP_SCHEDULE_DELAY="200"
export OTEL_BSP_EXPORT_TIMEOUT="2000"
# remove "server:" from STACK_CONFIG # remove "server:" from STACK_CONFIG
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
nohup llama stack run $stack_config > server.log 2>&1 & nohup llama stack run $stack_config > server.log 2>&1 &
@ -284,10 +293,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
docker stop "$container_name" 2>/dev/null || true docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true
# Configure telemetry collector port shared between host and container
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
# Build environment variables for docker run # Build environment variables for docker run
DOCKER_ENV_VARS="" DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
# Pass through API keys if they exist # Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY" [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -308,8 +322,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
fi fi
echo "Using image: $IMAGE_NAME" echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \ # On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ # Use regular port mapping instead
NETWORK_MODE=""
PORT_MAPPINGS=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
else
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
echo "Using bridge networking with port mapping (non-Linux)"
fi
docker run -d $NETWORK_MODE --name "$container_name" \
$PORT_MAPPINGS \
$DOCKER_ENV_VARS \ $DOCKER_ENV_VARS \
"$IMAGE_NAME" \ "$IMAGE_NAME" \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT

View file

@ -6,7 +6,7 @@
# the root directory of this source tree. # the root directory of this source tree.
set -e set -e
cd llama_stack/ui cd src/llama_stack/ui
if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
echo "UI dependencies not installed, skipping prettier/linter check" echo "UI dependencies not installed, skipping prettier/linter check"

View file

@ -0,0 +1,12 @@
apiVersion: 1
providers:
- name: 'Llama Stack'
orgId: 1
folder: ''
type: file
disableDeletion: false
updateIntervalSeconds: 10
allowUiUpdates: true
options:
path: /etc/grafana/provisioning/dashboards

View file

@ -5,6 +5,7 @@ datasources:
type: prometheus type: prometheus
access: proxy access: proxy
url: http://prometheus:9090 url: http://prometheus:9090
uid: prometheus
isDefault: true isDefault: true
editable: true editable: true

View file

@ -0,0 +1,457 @@
{
"annotations": {
"list": []
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_completion_tokens_total",
"legendFormat": "{{model_id}} ({{provider_id}})",
"refId": "A"
}
],
"title": "Completion Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 0
},
"id": 2,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_prompt_tokens_total",
"legendFormat": "Prompt - {{model_id}}",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "llama_stack_tokens_total",
"legendFormat": "Total - {{model_id}}",
"refId": "B"
}
],
"title": "Prompt & Total Tokens",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "ms"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 8
},
"id": 3,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
"legendFormat": "p95",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
"legendFormat": "p99",
"refId": "B"
}
],
"title": "HTTP Request Duration (p95, p99)",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 6,
"x": 12,
"y": 8
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
"refId": "A"
}
],
"title": "Total Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
}
}
},
"gridPos": {
"h": 8,
"w": 6,
"x": 18,
"y": 8
},
"id": 5,
"options": {
"colorMode": "value",
"graphMode": "none",
"justifyMode": "auto",
"orientation": "auto",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "auto"
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "sum(llama_stack_http_server_active_requests)",
"refId": "A"
}
],
"title": "Active Requests",
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "reqps"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 0,
"y": 16
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
"legendFormat": "{{http_target}} - {{http_status_code}}",
"refId": "A"
}
],
"title": "Request Rate",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"lineInterpolation": "linear",
"showPoints": "auto",
"fillOpacity": 10
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "Bps"
}
},
"gridPos": {
"h": 8,
"w": 12,
"x": 12,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "list",
"placement": "bottom",
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
"legendFormat": "Request",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
"legendFormat": "Response",
"refId": "B"
}
],
"title": "Request/Response Sizes",
"type": "timeseries"
}
],
"refresh": "5s",
"schemaVersion": 38,
"tags": [
"llama-stack"
],
"templating": {
"list": []
},
"time": {
"from": "now-15m",
"to": "now"
},
"timepicker": {},
"timezone": "browser",
"title": "Llama Stack Metrics",
"uid": "llama-stack-metrics",
"version": 0,
"weekStart": ""
}

View file

@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
-e GF_SECURITY_ADMIN_PASSWORD=admin \ -e GF_SECURITY_ADMIN_PASSWORD=admin \
-e GF_USERS_ALLOW_SIGN_UP=false \ -e GF_USERS_ALLOW_SIGN_UP=false \
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \ -v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
docker.io/grafana/grafana:11.0.0 docker.io/grafana/grafana:11.0.0
# Wait for services to start # Wait for services to start

View file

@ -27,4 +27,4 @@ fi
# Run unit tests with coverage # Run unit tests with coverage
uv run --python "$PYTHON_VERSION" --with-editable . --group unit \ uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@" coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@"

View file

@ -38,6 +38,7 @@ from .openai_responses import (
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseObject, OpenAIResponseObject,
OpenAIResponseObjectStream, OpenAIResponseObjectStream,
OpenAIResponsePrompt,
OpenAIResponseText, OpenAIResponseText,
) )
@ -149,13 +150,13 @@ class ShieldCallStep(StepCommon):
class MemoryRetrievalStep(StepCommon): class MemoryRetrievalStep(StepCommon):
"""A memory retrieval step in an agent turn. """A memory retrieval step in an agent turn.
:param vector_db_ids: The IDs of the vector databases to retrieve context from. :param vector_store_ids: The IDs of the vector databases to retrieve context from.
:param inserted_context: The context retrieved from the vector databases. :param inserted_context: The context retrieved from the vector databases.
""" """
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
# TODO: should this be List[str]? # TODO: should this be List[str]?
vector_db_ids: str vector_store_ids: str
inserted_context: InterleavedContent inserted_context: InterleavedContent
@ -810,6 +811,7 @@ class Agents(Protocol):
self, self,
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
model: str, model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None, instructions: str | None = None,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
@ -831,6 +833,7 @@ class Agents(Protocol):
:param input: Input message(s) to create the response. :param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions. :param model: The underlying LLM used for completions.
:param prompt: (Optional) Prompt object with ID, version, and variables.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation. :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
:param include: (Optional) Additional fields to include in the response. :param include: (Optional) Additional fields to include in the response.

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Sequence
from typing import Annotated, Any, Literal from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, model_validator
from typing_extensions import TypedDict from typing_extensions import TypedDict
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
@ -46,23 +47,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
:param detail: Level of detail for image processing, can be "low", "high", or "auto" :param detail: Level of detail for image processing, can be "low", "high", or "auto"
:param type: Content type identifier, always "input_image" :param type: Content type identifier, always "input_image"
:param file_id: (Optional) The ID of the file to be sent to the model.
:param image_url: (Optional) URL of the image content :param image_url: (Optional) URL of the image content
""" """
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image" type: Literal["input_image"] = "input_image"
# TODO: handle file_id file_id: str | None = None
image_url: str | None = None image_url: str | None = None
# TODO: handle file content types @json_schema_type
class OpenAIResponseInputMessageContentFile(BaseModel):
"""File content for input messages in OpenAI response format.
:param type: The type of the input item. Always `input_file`.
:param file_data: The data of the file to be sent to the model.
:param file_id: (Optional) The ID of the file to be sent to the model.
:param file_url: The URL of the file to be sent to the model.
:param filename: The name of the file to be sent to the model.
"""
type: Literal["input_file"] = "input_file"
file_data: str | None = None
file_id: str | None = None
file_url: str | None = None
filename: str | None = None
@model_validator(mode="after")
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
raise ValueError(
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
)
return self
OpenAIResponseInputMessageContent = Annotated[ OpenAIResponseInputMessageContent = Annotated[
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentText
| OpenAIResponseInputMessageContentImage
| OpenAIResponseInputMessageContentFile,
Field(discriminator="type"), Field(discriminator="type"),
] ]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponsePrompt(BaseModel):
"""OpenAI compatible Prompt object that is used in OpenAI responses.
:param id: Unique identifier of the prompt template
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
like images or files.
:param version: Version number of the prompt to use (defaults to latest if not specified)
"""
id: str
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
version: str | None = None
@json_schema_type @json_schema_type
class OpenAIResponseAnnotationFileCitation(BaseModel): class OpenAIResponseAnnotationFileCitation(BaseModel):
"""File citation annotation for referencing specific files in response content. """File citation annotation for referencing specific files in response content.
@ -159,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
scenarios. scenarios.
""" """
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent] content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message" type: Literal["message"] = "message"
@ -211,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
""" """
id: str id: str
queries: list[str] queries: Sequence[str]
status: str status: str
type: Literal["file_search_call"] = "file_search_call" type: Literal["file_search_call"] = "file_search_call"
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type @json_schema_type
@ -538,6 +582,7 @@ class OpenAIResponseObject(BaseModel):
:param output: List of generated output items (messages, tool calls, etc.) :param output: List of generated output items (messages, tool calls, etc.)
:param parallel_tool_calls: Whether tool calls can be executed in parallel :param parallel_tool_calls: Whether tool calls can be executed in parallel
:param previous_response_id: (Optional) ID of the previous response in a conversation :param previous_response_id: (Optional) ID of the previous response in a conversation
:param prompt: (Optional) Reference to a prompt template and its variables.
:param status: Current status of the response generation :param status: Current status of the response generation
:param temperature: (Optional) Sampling temperature used for generation :param temperature: (Optional) Sampling temperature used for generation
:param text: Text formatting configuration for the response :param text: Text formatting configuration for the response
@ -553,16 +598,17 @@ class OpenAIResponseObject(BaseModel):
id: str id: str
model: str model: str
object: Literal["response"] = "response" object: Literal["response"] = "response"
output: list[OpenAIResponseOutput] output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False parallel_tool_calls: bool = False
previous_response_id: str | None = None previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
status: str status: str
temperature: float | None = None temperature: float | None = None
# Default to text format to avoid breaking the loading of old responses # Default to text format to avoid breaking the loading of old responses
# before the field was added. New responses will have this set always. # before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None top_p: float | None = None
tools: list[OpenAIResponseTool] | None = None tools: Sequence[OpenAIResponseTool] | None = None
truncation: str | None = None truncation: str | None = None
usage: OpenAIResponseUsage | None = None usage: OpenAIResponseUsage | None = None
instructions: str | None = None instructions: str | None = None
@ -1254,14 +1300,9 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
OpenAIResponseInput = Annotated[ OpenAIResponseInput = Annotated[
# Responses API allows output messages to be passed in as input # Responses API allows output messages to be passed in as input
OpenAIResponseOutputMessageWebSearchToolCall OpenAIResponseOutput
| OpenAIResponseOutputMessageFileSearchToolCall
| OpenAIResponseOutputMessageFunctionToolCall
| OpenAIResponseInputFunctionToolCallOutput | OpenAIResponseInputFunctionToolCallOutput
| OpenAIResponseMCPApprovalRequest
| OpenAIResponseMCPApprovalResponse | OpenAIResponseMCPApprovalResponse
| OpenAIResponseOutputMessageMCPCall
| OpenAIResponseOutputMessageMCPListTools
| OpenAIResponseMessage, | OpenAIResponseMessage,
Field(union_mode="left_to_right"), Field(union_mode="left_to_right"),
] ]
@ -1275,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseInput] data: Sequence[OpenAIResponseInput]
object: Literal["list"] = "list" object: Literal["list"] = "list"
@ -1286,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
:param input: List of input items that led to this response :param input: List of input items that led to this response
""" """
input: list[OpenAIResponseInput] input: Sequence[OpenAIResponseInput]
def to_response_object(self) -> OpenAIResponseObject: def to_response_object(self) -> OpenAIResponseObject:
"""Convert to OpenAIResponseObject by excluding input field.""" """Convert to OpenAIResponseObject by excluding input field."""
@ -1304,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseObjectWithInput] data: Sequence[OpenAIResponseObjectWithInput]
has_more: bool has_more: bool
first_id: str first_id: str
last_id: str last_id: str

View file

@ -21,8 +21,8 @@ from typing_extensions import TypedDict
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
from llama_stack.apis.common.responses import Order from llama_stack.apis.common.responses import Order
from llama_stack.apis.models import Model from llama_stack.apis.models import Model
from llama_stack.apis.telemetry import MetricResponseMixin
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.models.llama.datatypes import ( from llama_stack.models.llama.datatypes import (
BuiltinTool, BuiltinTool,
@ -97,7 +97,7 @@ class SamplingParams(BaseModel):
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy) strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
max_tokens: int | None = 0 max_tokens: int | None = None
repetition_penalty: float | None = 1.0 repetition_penalty: float | None = 1.0
stop: list[str] | None = None stop: list[str] | None = None

Some files were not shown because too many files have changed in this diff Show more