Merge branch 'main' into fix-vector

This commit is contained in:
Francisco Arceo 2025-11-04 11:17:41 -05:00 committed by GitHub
commit 60b3ac7f10
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
508 changed files with 101100 additions and 82743 deletions

View file

@ -0,0 +1,60 @@
name: Install llama-stack-client
description: Install llama-stack-client based on branch context and client-version input
inputs:
client-version:
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
required: false
default: ""
outputs:
uv-extra-index-url:
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
value: ${{ steps.configure.outputs.uv-extra-index-url }}
install-after-sync:
description: 'Whether to install client after uv sync'
value: ${{ steps.configure.outputs.install-after-sync }}
install-source:
description: 'Where to install client from after sync'
value: ${{ steps.configure.outputs.install-source }}
runs:
using: "composite"
steps:
- name: Configure client installation
id: configure
shell: bash
run: |
# Determine the branch we're working with
BRANCH="${{ github.base_ref || github.ref }}"
BRANCH="${BRANCH#refs/heads/}"
echo "Working with branch: $BRANCH"
# On release branches: use test.pypi for uv sync, then install from git
# On non-release branches: install based on client-version after sync
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
echo "Detected release branch: $BRANCH"
# Check if matching branch exists in client repo
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
exit 1
fi
# Configure to use test.pypi as extra index (PyPI is primary)
echo "uv-extra-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "latest" ]; then
# Install from main git after sync
echo "install-after-sync=true" >> $GITHUB_OUTPUT
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
elif [ "${{ inputs.client-version }}" = "published" ]; then
# Use published version from PyPI (installed by sync)
echo "install-after-sync=false" >> $GITHUB_OUTPUT
elif [ -n "${{ inputs.client-version }}" ]; then
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi

View file

@ -94,7 +94,7 @@ runs:
if: ${{ always() }} if: ${{ always() }}
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
with: with:
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }} name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }}
path: | path: |
*.log *.log
retention-days: 1 retention-days: 1

View file

@ -18,25 +18,35 @@ runs:
python-version: ${{ inputs.python-version }} python-version: ${{ inputs.python-version }}
version: 0.7.6 version: 0.7.6
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
with:
client-version: ${{ inputs.client-version }}
- name: Install dependencies - name: Install dependencies
shell: bash shell: bash
env:
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: | run: |
# Export UV env vars for current step and persist to GITHUB_ENV for subsequent steps
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
export UV_INDEX_STRATEGY=unsafe-best-match
echo "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL" >> $GITHUB_ENV
echo "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY" >> $GITHUB_ENV
echo "Exported UV environment variables for current and subsequent steps"
fi
echo "Updating project dependencies via uv sync" echo "Updating project dependencies via uv sync"
uv sync --all-groups uv sync --all-groups
echo "Installing ad-hoc dependencies" echo "Installing ad-hoc dependencies"
uv pip install faiss-cpu uv pip install faiss-cpu
# Install llama-stack-client-python based on the client-version input # Install specific client version after sync if needed
if [ "${{ inputs.client-version }}" = "latest" ]; then if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing latest llama-stack-client-python from main branch" echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main uv pip install ${{ steps.client-config.outputs.install-source }}
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
uv pip install llama-stack-client
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi fi
echo "Installed llama packages" echo "Installed llama packages"

View file

@ -42,18 +42,7 @@ runs:
- name: Build Llama Stack - name: Build Llama Stack
shell: bash shell: bash
run: | run: |
# Install llama-stack-client-python based on the client-version input # Client is already installed by setup-runner (handles both main and release branches)
if [ "${{ inputs.client-version }}" = "latest" ]; then
echo "Installing latest llama-stack-client-python from main branch"
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
elif [ "${{ inputs.client-version }}" = "published" ]; then
echo "Installing published llama-stack-client-python from PyPI"
unset LLAMA_STACK_CLIENT_DIR
else
echo "Invalid client-version: ${{ inputs.client-version }}"
exit 1
fi
echo "Building Llama Stack" echo "Building Llama Stack"
LLAMA_STACK_DIR=. \ LLAMA_STACK_DIR=. \

23
.github/mergify.yml vendored Normal file
View file

@ -0,0 +1,23 @@
pull_request_rules:
- name: ping author on conflicts and add 'needs-rebase' label
conditions:
- conflict
- -closed
actions:
label:
add:
- needs-rebase
comment:
message: >
This pull request has merge conflicts that must be resolved before it
can be merged. @{{author}} please rebase it.
https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/working-with-forks/syncing-a-fork
- name: remove 'needs-rebase' label when conflict is resolved
conditions:
- -conflict
- -closed
actions:
label:
remove:
- needs-rebase

View file

@ -4,6 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
| Name | File | Purpose | | Name | File | Purpose |
| ---- | ---- | ------- | | ---- | ---- | ------- |
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md | | Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. | | API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script | | Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
@ -12,7 +13,6 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
| Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode | | Integration Tests (Replay) | [integration-tests.yml](integration-tests.yml) | Run the integration test suites from tests/integration in replay mode |
| Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers | | Vector IO Integration Tests | [integration-vector-io-tests.yml](integration-vector-io-tests.yml) | Run the integration test suite with various VectorIO providers |
| Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks | | Pre-commit | [pre-commit.yml](pre-commit.yml) | Run pre-commit checks |
| Pre-commit Bot | [precommit-trigger.yml](precommit-trigger.yml) | Pre-commit bot for PR |
| Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build | | Test Llama Stack Build | [providers-build.yml](providers-build.yml) | Test llama stack build |
| Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps | | Test llama stack list-deps | [providers-list-deps.yml](providers-list-deps.yml) | Test llama stack list-deps |
| Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project | | Python Package Build Test | [python-build-test.yml](python-build-test.yml) | Test building the llama-stack PyPI project |

578
.github/workflows/backward-compat.yml vendored Normal file
View file

@ -0,0 +1,578 @@
name: Backward Compatibility Check
run-name: Check backward compatibility for run.yaml configs
on:
pull_request:
branches:
- main
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+.[0-9]+'
- 'release-[0-9]+.[0-9]+'
paths:
- 'src/llama_stack/core/datatypes.py'
- 'src/llama_stack/providers/datatypes.py'
- 'src/llama_stack/distributions/**/run.yaml'
- 'tests/backward_compat/**'
- '.github/workflows/backward-compat.yml'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
check-main-compatibility:
name: Check Compatibility with main
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0 # Need full history to access main branch
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Extract run.yaml files from main branch
id: extract_configs
run: |
# Get list of run.yaml paths from main
git fetch origin main
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "No run.yaml files found in main branch"
exit 1
fi
# Extract all configs to a temp directory
mkdir -p /tmp/main_configs
echo "Extracting configs from main branch:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
# Extract filename for storage
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml"
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files"
- name: Test all configs from main
id: test_configs
continue-on-error: true
run: |
# Run pytest once with all configs parameterized
if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then
echo "failed=false" >> $GITHUB_OUTPUT
else
echo "failed=true" >> $GITHUB_OUTPUT
exit 1
fi
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_configs.outputs.failed == 'true'
run: |
echo "Breaking changes detected. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate results
if: always()
run: |
FAILED="${{ steps.test_configs.outputs.failed }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml."
echo "The changes have been properly acknowledged."
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Breaking changes detected without acknowledgment"
echo ""
echo "This PR introduces backward-incompatible changes to run.yaml"
echo "that will break existing user configurations."
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Add the 'breaking-change' label to this PR"
echo " 3. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-main:
name: Run Integration Tests with main Config
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Extract ci-tests run.yaml from main
run: |
git fetch origin main
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
echo "Extracted ci-tests run.yaml from main branch"
- name: Setup test environment
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with main config
id: test_integration
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/main-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Check for breaking change acknowledgment
id: check_ack
if: steps.test_integration.outcome == 'failure'
run: |
echo "Integration tests failed. Checking for acknowledgment..."
# Check PR title for '!:' marker (conventional commits)
PR_TITLE="${{ github.event.pull_request.title }}"
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
echo "✓ Breaking change acknowledged in PR title"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
# Check commit messages for BREAKING CHANGE:
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
echo "✓ Breaking change acknowledged in commit message"
echo "acknowledged=true" >> $GITHUB_OUTPUT
exit 0
fi
echo "✗ Breaking change NOT acknowledged"
echo "acknowledged=false" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Evaluate integration test results
if: always()
run: |
TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}"
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
if [[ "$TEST_FAILED" == "true" ]]; then
if [[ "$ACKNOWLEDGED" == "true" ]]; then
echo ""
echo "⚠️ WARNING: Integration tests failed with main config but acknowledged"
echo ""
exit 0 # Pass the check
else
echo ""
echo "❌ ERROR: Integration tests failed with main config without acknowledgment"
echo ""
echo "To acknowledge this breaking change, do ONE of:"
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
echo " 2. Include 'BREAKING CHANGE:' in a commit message"
echo ""
exit 1 # Fail the check
fi
fi
test-integration-release:
name: Run Integration Tests with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract ci-tests run.yaml from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_config
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Try with src/ prefix first (newer releases), then without (older releases)
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
echo "has_config=true" >> $GITHUB_OUTPUT
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
echo "has_config=true" >> $GITHUB_OUTPUT
else
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
echo "has_config=false" >> $GITHUB_OUTPUT
fi
- name: Setup test environment
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (PR branch)
id: test_release_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
git checkout origin/main
- name: Setup test environment for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
uses: ./.github/actions/setup-test-environment
with:
python-version: '3.12'
client-version: 'latest'
setup: 'ollama'
suite: 'base'
inference-mode: 'replay'
- name: Run integration tests with release config (main branch)
id: test_release_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
continue-on-error: true
uses: ./.github/actions/run-and-record-tests
with:
stack-config: /tmp/release-ci-tests-run.yaml
setup: 'ollama'
inference-mode: 'replay'
suite: 'base'
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_release_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_release_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` are now failing**
⚠️ This PR introduces a breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Breaking Change Detected
**Integration tests against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
- Existing configurations may break
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Compatibility Test Failed (Existing Issue)
**Integration tests against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Compatibility Test Passed
Integration tests against release \`$RELEASE_TAG\` passed successfully.
This PR maintains compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}
check-schema-release-compatibility:
name: Check Schema Compatibility with Latest Release (Informational)
runs-on: ubuntu-latest
steps:
- name: Checkout PR branch
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
- name: Install uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
with:
enable-cache: true
- name: Install dependencies
run: |
uv sync --group dev
- name: Get latest release
id: get_release
run: |
# Get the latest release from GitHub
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
if [ -z "$LATEST_TAG" ]; then
echo "No releases found, skipping release compatibility check"
echo "has_release=false" >> $GITHUB_OUTPUT
exit 0
fi
echo "Latest release: $LATEST_TAG"
echo "has_release=true" >> $GITHUB_OUTPUT
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
env:
GH_TOKEN: ${{ github.token }}
- name: Extract configs from release
if: steps.get_release.outputs.has_release == 'true'
id: extract_release_configs
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
# Get run.yaml files from the release (try both src/ and old path)
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
if [ -z "$CONFIG_PATHS" ]; then
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
echo "has_configs=false" >> $GITHUB_OUTPUT
exit 0
fi
# Extract all configs to a temp directory
mkdir -p /tmp/release_configs
echo "Extracting configs from release $RELEASE_TAG:"
while IFS= read -r config_path; do
if [ -z "$config_path" ]; then
continue
fi
filename=$(basename $(dirname "$config_path"))
echo " - $filename (from $config_path)"
git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true
done <<< "$CONFIG_PATHS"
echo ""
echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files"
echo "has_configs=true" >> $GITHUB_OUTPUT
- name: Test against release configs (PR branch)
id: test_schema_pr
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Checkout main branch to test baseline
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
git checkout origin/main
- name: Install dependencies for main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
uv sync --group dev
- name: Test against release configs (main branch)
id: test_schema_main
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
continue-on-error: true
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
- name: Report results and post PR comment
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
run: |
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
PR_OUTCOME="${{ steps.test_schema_pr.outcome }}"
MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}"
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
# NEW breaking change - PR fails but main passes
echo "::error::🚨 This PR introduces a NEW schema breaking change!"
# Check if we already posted a comment (to avoid spam on every push)
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1)
if [[ -z "$EXISTING_COMMENT" ]]; then
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` is now failing**
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
The tests pass on \`main\` but fail with this PR's changes.
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users."
else
echo "Comment already exists, skipping to avoid spam"
fi
cat >> $GITHUB_STEP_SUMMARY <<EOF
## 🚨 NEW Schema Breaking Change Detected
**Schema validation against release \`$RELEASE_TAG\` FAILED**
⚠️ **This PR introduces a NEW schema breaking change**
- Tests **PASS** on main branch ✅
- Tests **FAIL** on PR branch ❌
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
- Existing run.yaml configurations will fail validation
> **Note:** This is informational only and does not block merge.
> Consider whether this breaking change is acceptable for users.
EOF
elif [[ "$PR_OUTCOME" == "failure" ]]; then
# Existing breaking change - both PR and main fail
echo "::warning::Schema breaking change already exists in main branch"
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ⚠️ Release Schema Compatibility Failed (Existing Issue)
**Schema validation against release \`$RELEASE_TAG\` FAILED**
- Tests **FAIL** on main branch ❌
- Tests **FAIL** on PR branch ❌
- This schema breaking change already exists in main (not introduced by this PR)
> **Note:** This is informational only.
EOF
else
# Success - tests pass
cat >> $GITHUB_STEP_SUMMARY <<EOF
## ✅ Release Schema Compatibility Passed
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
This PR maintains backward compatibility with the latest release.
EOF
fi
env:
GH_TOKEN: ${{ github.token }}

View file

@ -22,7 +22,6 @@ on:
- 'docs/static/stable-llama-stack-spec.yaml' # Stable APIs spec - 'docs/static/stable-llama-stack-spec.yaml' # Stable APIs spec
- 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec - 'docs/static/experimental-llama-stack-spec.yaml' # Experimental APIs spec
- 'docs/static/deprecated-llama-stack-spec.yaml' # Deprecated APIs spec - 'docs/static/deprecated-llama-stack-spec.yaml' # Deprecated APIs spec
- 'docs/static/llama-stack-spec.html' # Legacy HTML spec
- '.github/workflows/conformance.yml' # This workflow itself - '.github/workflows/conformance.yml' # This workflow itself
concurrency: concurrency:

View file

@ -30,10 +30,16 @@ jobs:
- name: Build a single provider - name: Build a single provider
run: | run: |
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=starter"
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
fi
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
fi
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ $BUILD_ARGS \
--build-arg DISTRO_NAME=starter \
--tag llama-stack:starter-ci --tag llama-stack:starter-ci
- name: Run installer end-to-end - name: Run installer end-to-end

View file

@ -4,9 +4,13 @@ run-name: Run the integration test suite with Kubernetes authentication
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'distributions/**' - 'distributions/**'
- 'src/llama_stack/**' - 'src/llama_stack/**'

View file

@ -4,9 +4,13 @@ run-name: Run the integration test suite with SqlStore
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'src/llama_stack/providers/utils/sqlstore/**' - 'src/llama_stack/providers/utils/sqlstore/**'
- 'tests/integration/sqlstore/**' - 'tests/integration/sqlstore/**'

View file

@ -4,9 +4,13 @@ run-name: Run the integration test suites from tests/integration in replay mode
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
types: [opened, synchronize, reopened] types: [opened, synchronize, reopened]
paths: paths:
- 'src/llama_stack/**' - 'src/llama_stack/**'
@ -18,6 +22,7 @@ on:
- '.github/actions/setup-ollama/action.yml' - '.github/actions/setup-ollama/action.yml'
- '.github/actions/setup-test-environment/action.yml' - '.github/actions/setup-test-environment/action.yml'
- '.github/actions/run-and-record-tests/action.yml' - '.github/actions/run-and-record-tests/action.yml'
- 'scripts/integration-tests.sh'
schedule: schedule:
# If changing the cron schedule, update the provider in the test-matrix job # If changing the cron schedule, update the provider in the test-matrix job
- cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC - cron: '0 0 * * *' # (test latest client) Daily at 12 AM UTC
@ -47,7 +52,7 @@ jobs:
strategy: strategy:
fail-fast: false fail-fast: false
matrix: matrix:
client-type: [library, docker] client-type: [library, docker, server]
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12 # Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }} python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }} client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}

View file

@ -4,9 +4,13 @@ run-name: Run the integration test suite with various VectorIO providers
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'src/llama_stack/**' - 'src/llama_stack/**'
- '!src/llama_stack/ui/**' - '!src/llama_stack/ui/**'

View file

@ -5,7 +5,9 @@ run-name: Run pre-commit checks
on: on:
pull_request: pull_request:
push: push:
branches: [main] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
concurrency: concurrency:
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }} group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
@ -43,23 +45,41 @@ jobs:
cache: 'npm' cache: 'npm'
cache-dependency-path: 'src/llama_stack/ui/' cache-dependency-path: 'src/llama_stack/ui/'
- name: Set up uv
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
- name: Install npm dependencies - name: Install npm dependencies
run: npm ci run: npm ci
working-directory: src/llama_stack/ui working-directory: src/llama_stack/ui
- name: Install pre-commit
run: python -m pip install pre-commit
- name: Cache pre-commit
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
with:
path: ~/.cache/pre-commit
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
- name: Run pre-commit - name: Run pre-commit
id: precommit id: precommit
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 run: |
continue-on-error: true set +e
pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log
status=${PIPESTATUS[0]}
echo "status=$status" >> $GITHUB_OUTPUT
exit 0
env: env:
SKIP: no-commit-to-branch SKIP: no-commit-to-branch,mypy
RUFF_OUTPUT_FORMAT: github RUFF_OUTPUT_FORMAT: github
- name: Check pre-commit results - name: Check pre-commit results
if: steps.precommit.outcome == 'failure' if: steps.precommit.outputs.status != '0'
run: | run: |
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes." echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
echo "::warning::Some pre-commit hooks failed. Check the output above for details." echo ""
echo "Failed hooks output:"
cat /tmp/precommit.log
exit 1 exit 1
- name: Debug - name: Debug
@ -109,3 +129,39 @@ jobs:
echo "$unstaged_files" echo "$unstaged_files"
exit 1 exit 1
fi fi
- name: Configure client installation
id: client-config
uses: ./.github/actions/install-llama-stack-client
- name: Sync dev + type_checking dependencies
env:
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: |
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
export UV_INDEX_STRATEGY="unsafe-best-match"
fi
uv sync --group dev --group type_checking
# Install specific client version after sync if needed
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
uv pip install ${{ steps.client-config.outputs.install-source }}
fi
- name: Run mypy (full type_checking)
env:
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
run: |
if [ -n "$UV_EXTRA_INDEX_URL" ]; then
export UV_INDEX_STRATEGY="unsafe-best-match"
fi
set +e
uv run --group dev --group type_checking mypy
status=$?
if [ $status -ne 0 ]; then
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
fi
exit $status

View file

@ -1,227 +0,0 @@
name: Pre-commit Bot
run-name: Pre-commit bot for PR #${{ github.event.issue.number }}
on:
issue_comment:
types: [created]
jobs:
pre-commit:
# Only run on pull request comments
if: github.event.issue.pull_request && contains(github.event.comment.body, '@github-actions run precommit')
runs-on: ubuntu-latest
permissions:
contents: write
pull-requests: write
steps:
- name: Check comment author and get PR details
id: check_author
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
// Get PR details
const pr = await github.rest.pulls.get({
owner: context.repo.owner,
repo: context.repo.repo,
pull_number: context.issue.number
});
// Check if commenter has write access or is the PR author
const commenter = context.payload.comment.user.login;
const prAuthor = pr.data.user.login;
let hasPermission = false;
// Check if commenter is PR author
if (commenter === prAuthor) {
hasPermission = true;
console.log(`Comment author ${commenter} is the PR author`);
} else {
// Check if commenter has write/admin access
try {
const permission = await github.rest.repos.getCollaboratorPermissionLevel({
owner: context.repo.owner,
repo: context.repo.repo,
username: commenter
});
const level = permission.data.permission;
hasPermission = ['write', 'admin', 'maintain'].includes(level);
console.log(`Comment author ${commenter} has permission: ${level}`);
} catch (error) {
console.log(`Could not check permissions for ${commenter}: ${error.message}`);
}
}
if (!hasPermission) {
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: context.issue.number,
body: `❌ @${commenter} You don't have permission to trigger pre-commit. Only PR authors or repository collaborators can run this command.`
});
core.setFailed(`User ${commenter} does not have permission`);
return;
}
// Save PR info for later steps
core.setOutput('pr_number', context.issue.number);
core.setOutput('pr_head_ref', pr.data.head.ref);
core.setOutput('pr_head_sha', pr.data.head.sha);
core.setOutput('pr_head_repo', pr.data.head.repo.full_name);
core.setOutput('pr_base_ref', pr.data.base.ref);
core.setOutput('is_fork', pr.data.head.repo.full_name !== context.payload.repository.full_name);
core.setOutput('authorized', 'true');
- name: React to comment
if: steps.check_author.outputs.authorized == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.reactions.createForIssueComment({
owner: context.repo.owner,
repo: context.repo.repo,
comment_id: context.payload.comment.id,
content: 'rocket'
});
- name: Comment starting
if: steps.check_author.outputs.authorized == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: ${{ steps.check_author.outputs.pr_number }},
body: `⏳ Running [pre-commit hooks](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) on PR #${{ steps.check_author.outputs.pr_number }}...`
});
- name: Checkout PR branch (same-repo)
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'false'
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
ref: ${{ steps.check_author.outputs.pr_head_ref }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PR branch (fork)
if: steps.check_author.outputs.authorized == 'true' && steps.check_author.outputs.is_fork == 'true'
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
with:
repository: ${{ steps.check_author.outputs.pr_head_repo }}
ref: ${{ steps.check_author.outputs.pr_head_ref }}
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Verify checkout
if: steps.check_author.outputs.authorized == 'true'
run: |
echo "Current SHA: $(git rev-parse HEAD)"
echo "Expected SHA: ${{ steps.check_author.outputs.pr_head_sha }}"
if [[ "$(git rev-parse HEAD)" != "${{ steps.check_author.outputs.pr_head_sha }}" ]]; then
echo "::error::Checked out SHA does not match expected SHA"
exit 1
fi
- name: Set up Python
if: steps.check_author.outputs.authorized == 'true'
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
with:
python-version: '3.12'
cache: pip
cache-dependency-path: |
**/requirements*.txt
.pre-commit-config.yaml
- name: Set up Node.js
if: steps.check_author.outputs.authorized == 'true'
uses: actions/setup-node@2028fbc5c25fe9cf00d9f06a71cc4710d4507903 # v6.0.0
with:
node-version: '20'
cache: 'npm'
cache-dependency-path: 'src/llama_stack/ui/'
- name: Install npm dependencies
if: steps.check_author.outputs.authorized == 'true'
run: npm ci
working-directory: src/llama_stack/ui
- name: Run pre-commit
if: steps.check_author.outputs.authorized == 'true'
id: precommit
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
continue-on-error: true
env:
SKIP: no-commit-to-branch
RUFF_OUTPUT_FORMAT: github
- name: Check for changes
if: steps.check_author.outputs.authorized == 'true'
id: changes
run: |
if ! git diff --exit-code || [ -n "$(git ls-files --others --exclude-standard)" ]; then
echo "has_changes=true" >> $GITHUB_OUTPUT
echo "Changes detected after pre-commit"
else
echo "has_changes=false" >> $GITHUB_OUTPUT
echo "No changes after pre-commit"
fi
- name: Commit and push changes
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
run: |
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git config --local user.name "github-actions[bot]"
git add -A
git commit -m "style: apply pre-commit fixes
🤖 Applied by @github-actions bot via pre-commit workflow"
# Push changes
git push origin HEAD:${{ steps.check_author.outputs.pr_head_ref }}
- name: Comment success with changes
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'true'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: ${{ steps.check_author.outputs.pr_number }},
body: `✅ Pre-commit hooks completed successfully!\n\n🔧 Changes have been committed and pushed to the PR branch.`
});
- name: Comment success without changes
if: steps.check_author.outputs.authorized == 'true' && steps.changes.outputs.has_changes == 'false' && steps.precommit.outcome == 'success'
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: ${{ steps.check_author.outputs.pr_number }},
body: `✅ Pre-commit hooks passed!\n\n✨ No changes needed - your code is already formatted correctly.`
});
- name: Comment failure
if: failure()
uses: actions/github-script@ed597411d8f924073f98dfc5c65a23a2325f34cd # v8.0.0
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
await github.rest.issues.createComment({
owner: context.repo.owner,
repo: context.repo.repo,
issue_number: ${{ steps.check_author.outputs.pr_number }},
body: `❌ Pre-commit workflow failed!\n\nPlease check the [workflow logs](https://github.com/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}) for details.`
});

View file

@ -72,10 +72,16 @@ jobs:
- name: Build container image - name: Build container image
if: matrix.image-type == 'container' if: matrix.image-type == 'container'
run: | run: |
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=${{ matrix.distro }}"
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
fi
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
fi
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ $BUILD_ARGS \
--build-arg DISTRO_NAME=${{ matrix.distro }} \
--tag llama-stack:${{ matrix.distro }}-ci --tag llama-stack:${{ matrix.distro }}-ci
- name: Print dependencies in the image - name: Print dependencies in the image
@ -108,12 +114,18 @@ jobs:
- name: Build container image - name: Build container image
run: | run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml) BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
fi
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
fi
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ $BUILD_ARGS \
--build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests -t llama-stack:ci-tests
- name: Inspect the container image entrypoint - name: Inspect the container image entrypoint
@ -148,12 +160,18 @@ jobs:
- name: Build UBI9 container image - name: Build UBI9 container image
run: | run: |
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml) BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
BUILD_ARGS="--build-arg INSTALL_MODE=editable --build-arg DISTRO_NAME=ci-tests"
BUILD_ARGS="$BUILD_ARGS --build-arg BASE_IMAGE=$BASE_IMAGE"
BUILD_ARGS="$BUILD_ARGS --build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml"
if [ -n "${UV_EXTRA_INDEX_URL:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL"
fi
if [ -n "${UV_INDEX_STRATEGY:-}" ]; then
BUILD_ARGS="$BUILD_ARGS --build-arg UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY"
fi
docker build . \ docker build . \
-f containers/Containerfile \ -f containers/Containerfile \
--build-arg INSTALL_MODE=editable \ $BUILD_ARGS \
--build-arg DISTRO_NAME=ci-tests \
--build-arg BASE_IMAGE="$BASE_IMAGE" \
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
-t llama-stack:ci-tests-ubi9 -t llama-stack:ci-tests-ubi9
- name: Inspect UBI9 image - name: Inspect UBI9 image

View file

@ -24,7 +24,7 @@ jobs:
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0 uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
- name: Install uv - name: Install uv
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1 uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
with: with:
python-version: ${{ matrix.python-version }} python-version: ${{ matrix.python-version }}
activate-environment: true activate-environment: true

View file

@ -4,9 +4,13 @@ run-name: Run the unit test suite
on: on:
push: push:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
pull_request: pull_request:
branches: [ main ] branches:
- main
- 'release-[0-9]+.[0-9]+.x'
paths: paths:
- 'src/llama_stack/**' - 'src/llama_stack/**'
- '!src/llama_stack/ui/**' - '!src/llama_stack/ui/**'

3
.gitignore vendored
View file

@ -32,3 +32,6 @@ CLAUDE.md
docs/.docusaurus/ docs/.docusaurus/
docs/node_modules/ docs/node_modules/
docs/static/imported-files/ docs/static/imported-files/
docs/docs/api-deprecated/
docs/docs/api-experimental/
docs/docs/api/

View file

@ -52,13 +52,9 @@ repos:
additional_dependencies: additional_dependencies:
- black==24.3.0 - black==24.3.0
- repo: https://github.com/astral-sh/uv-pre-commit
rev: 0.7.20
hooks:
- id: uv-lock
- repo: https://github.com/pre-commit/mirrors-mypy - repo: https://github.com/pre-commit/mirrors-mypy
rev: v1.16.1 rev: v1.18.2
hooks: hooks:
- id: mypy - id: mypy
additional_dependencies: additional_dependencies:
@ -78,11 +74,26 @@ repos:
- repo: local - repo: local
hooks: hooks:
- id: uv-lock
name: uv-lock
additional_dependencies:
- uv==0.7.20
entry: ./scripts/uv-run-with-index.sh lock
language: python
pass_filenames: false
require_serial: true
files: ^(pyproject\.toml|uv\.lock)$
- id: mypy-full
name: mypy (full type_checking)
entry: ./scripts/uv-run-with-index.sh run --group dev --group type_checking mypy
language: system
pass_filenames: false
stages: [manual]
- id: distro-codegen - id: distro-codegen
name: Distribution Template Codegen name: Distribution Template Codegen
additional_dependencies: additional_dependencies:
- uv==0.7.8 - uv==0.7.8
entry: uv run --group codegen ./scripts/distro_codegen.py entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/distro_codegen.py
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
@ -91,7 +102,7 @@ repos:
name: Provider Codegen name: Provider Codegen
additional_dependencies: additional_dependencies:
- uv==0.7.8 - uv==0.7.8
entry: uv run --group codegen ./scripts/provider_codegen.py entry: ./scripts/uv-run-with-index.sh run --group codegen ./scripts/provider_codegen.py
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
@ -100,7 +111,7 @@ repos:
name: API Spec Codegen name: API Spec Codegen
additional_dependencies: additional_dependencies:
- uv==0.7.8 - uv==0.7.8
entry: sh -c 'uv run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null' entry: sh -c './scripts/uv-run-with-index.sh run ./docs/openapi_generator/run_openapi_generator.sh > /dev/null'
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
@ -141,7 +152,7 @@ repos:
name: Generate CI documentation name: Generate CI documentation
additional_dependencies: additional_dependencies:
- uv==0.7.8 - uv==0.7.8
entry: uv run ./scripts/gen-ci-docs.py entry: ./scripts/uv-run-with-index.sh run ./scripts/gen-ci-docs.py
language: python language: python
pass_filenames: false pass_filenames: false
require_serial: true require_serial: true
@ -172,6 +183,23 @@ repos:
exit 1 exit 1
fi fi
exit 0 exit 0
- id: fips-compliance
name: Ensure llama-stack remains FIPS compliant
entry: bash
language: system
types: [python]
pass_filenames: true
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
args:
- -c
- |
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
echo;
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
echo " These functions are not FIPS-compliant"
echo;
exit 1;
} || true
ci: ci:
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks

View file

@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify. The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
To run the expanded mypy configuration that CI enforces, use:
```bash
uv run pre-commit run mypy-full --hook-stage manual --all-files
```
or invoke mypy directly with all optional dependencies:
```bash
uv run --group dev --group type_checking mypy
```
```{caution} ```{caution}
Before pushing your changes, make sure that the pre-commit hooks have passed successfully. Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
``` ```

View file

@ -1,610 +0,0 @@
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
organization:
# Name of your organization or company, used to determine the name of the client
# and headings.
name: llama-stack-client
docs: https://llama-stack.readthedocs.io/en/latest/
contact: llamastack@meta.com
security:
- {}
- BearerAuth: []
security_schemes:
BearerAuth:
type: http
scheme: bearer
# `targets` define the output targets and their customization options, such as
# whether to emit the Node SDK and what it's package name should be.
targets:
node:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-typescript
publish:
npm: false
python:
package_name: llama_stack_client
production_repo: llamastack/llama-stack-client-python
options:
use_uv: true
publish:
pypi: true
project_name: llama_stack_client
kotlin:
reverse_domain: com.llama_stack_client.api
production_repo: null
publish:
maven: false
go:
package_name: llama-stack-client
production_repo: llamastack/llama-stack-client-go
options:
enable_v2: true
back_compat_use_shared_package: false
# `client_settings` define settings for the API client, such as extra constructor
# arguments (used for authentication), retry behavior, idempotency, etc.
client_settings:
default_env_prefix: LLAMA_STACK_CLIENT
opts:
api_key:
type: string
read_env: LLAMA_STACK_CLIENT_API_KEY
auth: { security_scheme: BearerAuth }
nullable: true
# `environments` are a map of the name of the environment (e.g. "sandbox",
# "production") to the corresponding url to use.
environments:
production: http://any-hosted-llama-stack.com
# `pagination` defines [pagination schemes] which provides a template to match
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
pagination:
- name: datasets_iterrows
type: offset
request:
dataset_id:
type: string
start_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_param
limit:
type: integer
response:
data:
type: array
items:
type: object
next_index:
type: integer
x-stainless-pagination-property:
purpose: offset_count_start_field
- name: openai_cursor_page
type: cursor
request:
limit:
type: integer
after:
type: string
x-stainless-pagination-property:
purpose: next_cursor_param
response:
data:
type: array
items: {}
has_more:
type: boolean
last_id:
type: string
x-stainless-pagination-property:
purpose: next_cursor_field
# `resources` define the structure and organziation for your API, such as how
# methods and models are grouped together and accessed. See the [configuration
# guide] for more information.
#
# [configuration guide]:
# https://app.stainlessapi.com/docs/guides/configure#resources
resources:
$shared:
models:
agent_config: AgentConfig
interleaved_content_item: InterleavedContentItem
interleaved_content: InterleavedContent
param_type: ParamType
safety_violation: SafetyViolation
sampling_params: SamplingParams
scoring_result: ScoringResult
message: Message
user_message: UserMessage
completion_message: CompletionMessage
tool_response_message: ToolResponseMessage
system_message: SystemMessage
tool_call: ToolCall
query_result: RAGQueryResult
document: RAGDocument
query_config: RAGQueryConfig
response_format: ResponseFormat
toolgroups:
models:
tool_group: ToolGroup
list_tool_groups_response: ListToolGroupsResponse
methods:
register: post /v1/toolgroups
get: get /v1/toolgroups/{toolgroup_id}
list: get /v1/toolgroups
unregister: delete /v1/toolgroups/{toolgroup_id}
tools:
methods:
get: get /v1/tools/{tool_name}
list:
endpoint: get /v1/tools
paginated: false
tool_runtime:
models:
tool_def: ToolDef
tool_invocation_result: ToolInvocationResult
methods:
list_tools:
endpoint: get /v1/tool-runtime/list-tools
paginated: false
invoke_tool: post /v1/tool-runtime/invoke
subresources:
rag_tool:
methods:
insert: post /v1/tool-runtime/rag-tool/insert
query: post /v1/tool-runtime/rag-tool/query
responses:
models:
response_object_stream: OpenAIResponseObjectStream
response_object: OpenAIResponseObject
methods:
create:
type: http
endpoint: post /v1/responses
streaming:
stream_event_model: responses.response_object_stream
param_discriminator: stream
retrieve: get /v1/responses/{response_id}
list:
type: http
endpoint: get /v1/responses
delete:
type: http
endpoint: delete /v1/responses/{response_id}
subresources:
input_items:
methods:
list:
type: http
endpoint: get /v1/responses/{response_id}/input_items
conversations:
models:
conversation_object: Conversation
methods:
create:
type: http
endpoint: post /v1/conversations
retrieve: get /v1/conversations/{conversation_id}
update:
type: http
endpoint: post /v1/conversations/{conversation_id}
delete:
type: http
endpoint: delete /v1/conversations/{conversation_id}
subresources:
items:
methods:
get:
type: http
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
list:
type: http
endpoint: get /v1/conversations/{conversation_id}/items
create:
type: http
endpoint: post /v1/conversations/{conversation_id}/items
inspect:
models:
healthInfo: HealthInfo
providerInfo: ProviderInfo
routeInfo: RouteInfo
versionInfo: VersionInfo
methods:
health: get /v1/health
version: get /v1/version
embeddings:
models:
create_embeddings_response: OpenAIEmbeddingsResponse
methods:
create: post /v1/embeddings
chat:
models:
chat_completion_chunk: OpenAIChatCompletionChunk
subresources:
completions:
methods:
create:
type: http
endpoint: post /v1/chat/completions
streaming:
stream_event_model: chat.chat_completion_chunk
param_discriminator: stream
list:
type: http
endpoint: get /v1/chat/completions
retrieve:
type: http
endpoint: get /v1/chat/completions/{completion_id}
completions:
methods:
create:
type: http
endpoint: post /v1/completions
streaming:
param_discriminator: stream
vector_io:
models:
queryChunksResponse: QueryChunksResponse
methods:
insert: post /v1/vector-io/insert
query: post /v1/vector-io/query
vector_stores:
models:
vector_store: VectorStoreObject
list_vector_stores_response: VectorStoreListResponse
vector_store_delete_response: VectorStoreDeleteResponse
vector_store_search_response: VectorStoreSearchResponsePage
methods:
create: post /v1/vector_stores
list:
endpoint: get /v1/vector_stores
retrieve: get /v1/vector_stores/{vector_store_id}
update: post /v1/vector_stores/{vector_store_id}
delete: delete /v1/vector_stores/{vector_store_id}
search: post /v1/vector_stores/{vector_store_id}/search
subresources:
files:
models:
vector_store_file: VectorStoreFileObject
methods:
list: get /v1/vector_stores/{vector_store_id}/files
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
create: post /v1/vector_stores/{vector_store_id}/files
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
file_batches:
models:
vector_store_file_batches: VectorStoreFileBatchObject
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
methods:
create: post /v1/vector_stores/{vector_store_id}/file_batches
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
models:
models:
model: Model
list_models_response: ListModelsResponse
methods:
retrieve: get /v1/models/{model_id}
list:
endpoint: get /v1/models
paginated: false
register: post /v1/models
unregister: delete /v1/models/{model_id}
subresources:
openai:
methods:
list:
endpoint: get /v1/models
paginated: false
providers:
models:
list_providers_response: ListProvidersResponse
methods:
list:
endpoint: get /v1/providers
paginated: false
retrieve: get /v1/providers/{provider_id}
routes:
models:
list_routes_response: ListRoutesResponse
methods:
list:
endpoint: get /v1/inspect/routes
paginated: false
moderations:
models:
create_response: ModerationObject
methods:
create: post /v1/moderations
safety:
models:
run_shield_response: RunShieldResponse
methods:
run_shield: post /v1/safety/run-shield
shields:
models:
shield: Shield
list_shields_response: ListShieldsResponse
methods:
retrieve: get /v1/shields/{identifier}
list:
endpoint: get /v1/shields
paginated: false
register: post /v1/shields
delete: delete /v1/shields/{identifier}
synthetic_data_generation:
models:
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
methods:
generate: post /v1/synthetic-data-generation/generate
telemetry:
models:
span_with_status: SpanWithStatus
trace: Trace
query_spans_response: QuerySpansResponse
event: Event
query_condition: QueryCondition
methods:
query_traces:
endpoint: post /v1alpha/telemetry/traces
skip_test_reason: 'unsupported query params in java / kotlin'
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
query_spans:
endpoint: post /v1alpha/telemetry/spans
skip_test_reason: 'unsupported query params in java / kotlin'
query_metrics:
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
skip_test_reason: 'unsupported query params in java / kotlin'
# log_event: post /v1alpha/telemetry/events
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
get_trace: get /v1alpha/telemetry/traces/{trace_id}
scoring:
methods:
score: post /v1/scoring/score
score_batch: post /v1/scoring/score-batch
scoring_functions:
methods:
retrieve: get /v1/scoring-functions/{scoring_fn_id}
list:
endpoint: get /v1/scoring-functions
paginated: false
register: post /v1/scoring-functions
models:
scoring_fn: ScoringFn
scoring_fn_params: ScoringFnParams
list_scoring_functions_response: ListScoringFunctionsResponse
benchmarks:
methods:
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
list:
endpoint: get /v1alpha/eval/benchmarks
paginated: false
register: post /v1alpha/eval/benchmarks
models:
benchmark: Benchmark
list_benchmarks_response: ListBenchmarksResponse
files:
methods:
create: post /v1/files
list: get /v1/files
retrieve: get /v1/files/{file_id}
delete: delete /v1/files/{file_id}
content: get /v1/files/{file_id}/content
models:
file: OpenAIFileObject
list_files_response: ListOpenAIFileResponse
delete_file_response: OpenAIFileDeleteResponse
alpha:
subresources:
inference:
methods:
rerank: post /v1alpha/inference/rerank
post_training:
models:
algorithm_config: AlgorithmConfig
post_training_job: PostTrainingJob
list_post_training_jobs_response: ListPostTrainingJobsResponse
methods:
preference_optimize: post /v1alpha/post-training/preference-optimize
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
subresources:
job:
methods:
artifacts: get /v1alpha/post-training/job/artifacts
cancel: post /v1alpha/post-training/job/cancel
status: get /v1alpha/post-training/job/status
list:
endpoint: get /v1alpha/post-training/jobs
paginated: false
eval:
methods:
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
subresources:
jobs:
methods:
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
models:
evaluate_response: EvaluateResponse
benchmark_config: BenchmarkConfig
job: Job
agents:
methods:
create: post /v1alpha/agents
list: get /v1alpha/agents
retrieve: get /v1alpha/agents/{agent_id}
delete: delete /v1alpha/agents/{agent_id}
models:
inference_step: InferenceStep
tool_execution_step: ToolExecutionStep
tool_response: ToolResponse
shield_call_step: ShieldCallStep
memory_retrieval_step: MemoryRetrievalStep
subresources:
session:
models:
session: Session
methods:
list: get /v1alpha/agents/{agent_id}/sessions
create: post /v1alpha/agents/{agent_id}/session
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
steps:
methods:
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
turn:
models:
turn: Turn
turn_response_event: AgentTurnResponseEvent
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
methods:
create:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
resume:
type: http
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
streaming:
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
param_discriminator: stream
beta:
subresources:
datasets:
models:
list_datasets_response: ListDatasetsResponse
methods:
register: post /v1beta/datasets
retrieve: get /v1beta/datasets/{dataset_id}
list:
endpoint: get /v1beta/datasets
paginated: false
unregister: delete /v1beta/datasets/{dataset_id}
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
settings:
license: MIT
unwrap_response_fields: [ data ]
openapi:
transformations:
- command: renameValue
reason: pydantic reserved name
args:
filter:
only:
- '$.components.schemas.InferenceStep.properties.model_response'
rename:
python:
property_name: 'inference_model_response'
# - command: renameValue
# reason: pydantic reserved name
# args:
# filter:
# only:
# - '$.components.schemas.Model.properties.model_type'
# rename:
# python:
# property_name: 'type'
- command: mergeObject
reason: Better return_type using enum
args:
target:
- '$.components.schemas'
object:
ReturnType:
additionalProperties: false
properties:
type:
enum:
- string
- number
- boolean
- array
- object
- json
- union
- chat_completion_input
- completion_input
- agent_turn_input
required:
- type
type: object
- command: replaceProperties
reason: Replace return type properties with better model (see above)
args:
filter:
only:
- '$.components.schemas.ScoringFn.properties.return_type'
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
value:
$ref: '#/components/schemas/ReturnType'
- command: oneOfToAnyOf
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
- reason: For better names
command: extractToRefs
args:
ref:
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
name: '#/components/schemas/ToolCallOrString'
# `readme` is used to configure the code snippets that will be rendered in the
# README.md of various SDKs. In particular, you can change the `headline`
# snippet's endpoint and the arguments to call it with.
readme:
example_requests:
default:
type: request
endpoint: post /v1/chat/completions
params: &ref_0 {}
headline:
type: request
endpoint: post /v1/models
params: *ref_0
pagination:
type: request
endpoint: post /v1/chat/completions
params: {}

File diff suppressed because it is too large Load diff

View file

@ -19,6 +19,8 @@ ARG KEEP_WORKSPACE=""
ARG DISTRO_NAME="starter" ARG DISTRO_NAME="starter"
ARG RUN_CONFIG_PATH="" ARG RUN_CONFIG_PATH=""
ARG UV_HTTP_TIMEOUT=500 ARG UV_HTTP_TIMEOUT=500
ARG UV_EXTRA_INDEX_URL=""
ARG UV_INDEX_STRATEGY=""
ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT} ENV UV_HTTP_TIMEOUT=${UV_HTTP_TIMEOUT}
ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONDONTWRITEBYTECODE=1
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_DISABLE_PIP_VERSION_CHECK=1
@ -45,7 +47,7 @@ RUN set -eux; \
exit 1; \ exit 1; \
fi fi
RUN pip install --no-cache uv RUN pip install --no-cache-dir uv
ENV UV_SYSTEM_PYTHON=1 ENV UV_SYSTEM_PYTHON=1
ENV INSTALL_MODE=${INSTALL_MODE} ENV INSTALL_MODE=${INSTALL_MODE}
@ -62,47 +64,60 @@ COPY . /workspace
# Install the client package if it is provided # Install the client package if it is provided
# NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python # NOTE: this is installed before llama-stack since llama-stack depends on llama-stack-client-python
# Unset UV index env vars to ensure we only use PyPI for the client
RUN set -eux; \ RUN set -eux; \
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \ if [ -n "$LLAMA_STACK_CLIENT_DIR" ]; then \
if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \ if [ ! -d "$LLAMA_STACK_CLIENT_DIR" ]; then \
echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \ echo "LLAMA_STACK_CLIENT_DIR is set but $LLAMA_STACK_CLIENT_DIR does not exist" >&2; \
exit 1; \ exit 1; \
fi; \ fi; \
uv pip install --no-cache -e "$LLAMA_STACK_CLIENT_DIR"; \ uv pip install --no-cache-dir -e "$LLAMA_STACK_CLIENT_DIR"; \
fi; fi;
# Install llama-stack # Install llama-stack
# Use UV_EXTRA_INDEX_URL inline only for editable install with RC dependencies
RUN set -eux; \ RUN set -eux; \
SAVED_UV_EXTRA_INDEX_URL="${UV_EXTRA_INDEX_URL:-}"; \
SAVED_UV_INDEX_STRATEGY="${UV_INDEX_STRATEGY:-}"; \
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
if [ "$INSTALL_MODE" = "editable" ]; then \ if [ "$INSTALL_MODE" = "editable" ]; then \
if [ ! -d "$LLAMA_STACK_DIR" ]; then \ if [ ! -d "$LLAMA_STACK_DIR" ]; then \
echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \ echo "INSTALL_MODE=editable requires LLAMA_STACK_DIR to point to a directory inside the build context" >&2; \
exit 1; \ exit 1; \
fi; \ fi; \
uv pip install --no-cache -e "$LLAMA_STACK_DIR"; \ if [ -n "$SAVED_UV_EXTRA_INDEX_URL" ] && [ -n "$SAVED_UV_INDEX_STRATEGY" ]; then \
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \ UV_EXTRA_INDEX_URL="$SAVED_UV_EXTRA_INDEX_URL" UV_INDEX_STRATEGY="$SAVED_UV_INDEX_STRATEGY" \
uv pip install --no-cache fastapi libcst; \ uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
if [ -n "$TEST_PYPI_VERSION" ]; then \
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
else \ else \
uv pip install --no-cache --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \ uv pip install --no-cache-dir -e "$LLAMA_STACK_DIR"; \
fi; \
elif [ "$INSTALL_MODE" = "test-pypi" ]; then \
uv pip install --no-cache-dir fastapi libcst; \
if [ -n "$TEST_PYPI_VERSION" ]; then \
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match "llama-stack==$TEST_PYPI_VERSION"; \
else \
uv pip install --no-cache-dir --extra-index-url https://test.pypi.org/simple/ --index-strategy unsafe-best-match llama-stack; \
fi; \ fi; \
else \ else \
if [ -n "$PYPI_VERSION" ]; then \ if [ -n "$PYPI_VERSION" ]; then \
uv pip install --no-cache "llama-stack==$PYPI_VERSION"; \ uv pip install --no-cache-dir "llama-stack==$PYPI_VERSION"; \
else \ else \
uv pip install --no-cache llama-stack; \ uv pip install --no-cache-dir llama-stack; \
fi; \ fi; \
fi; fi;
# Install the dependencies for the distribution # Install the dependencies for the distribution
# Explicitly unset UV index env vars to ensure we only use PyPI for distribution deps
RUN set -eux; \ RUN set -eux; \
unset UV_EXTRA_INDEX_URL UV_INDEX_STRATEGY; \
if [ -z "$DISTRO_NAME" ]; then \ if [ -z "$DISTRO_NAME" ]; then \
echo "DISTRO_NAME must be provided" >&2; \ echo "DISTRO_NAME must be provided" >&2; \
exit 1; \ exit 1; \
fi; \ fi; \
deps="$(llama stack list-deps "$DISTRO_NAME")"; \ deps="$(llama stack list-deps "$DISTRO_NAME")"; \
if [ -n "$deps" ]; then \ if [ -n "$deps" ]; then \
printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache; \ printf '%s\n' "$deps" | xargs -L1 uv pip install --no-cache-dir; \
fi fi
# Cleanup # Cleanup

View file

@ -23,5 +23,4 @@ A Llama Stack API is described as a collection of REST endpoints. We currently s
We are working on adding a few more APIs to complete the application lifecycle. These will include: We are working on adding a few more APIs to complete the application lifecycle. These will include:
- **Batch Inference**: run inference on a dataset of inputs - **Batch Inference**: run inference on a dataset of inputs
- **Batch Agents**: run agents on a dataset of inputs - **Batch Agents**: run agents on a dataset of inputs
- **Synthetic Data Generation**: generate synthetic data for model development
- **Batches**: OpenAI-compatible batch management for inference - **Batches**: OpenAI-compatible batch management for inference

View file

@ -79,6 +79,33 @@ docker run \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
--gpu all \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
llamastack/distribution-meta-reference-gpu \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv ### Via venv
Make sure you have the Llama Stack CLI available. Make sure you have the Llama Stack CLI available.

View file

@ -127,13 +127,39 @@ docker run \
-it \ -it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \ -v ~/.llama:/root/.llama \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \ llamastack/distribution-nvidia \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-nvidia \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
Available run configurations for this distribution:
- `run.yaml`
- `run-with-safety.yaml`
### Via venv ### Via venv
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.

View file

@ -239,8 +239,13 @@ client = LlamaStackClient(base_url="http://localhost:8321")
models = client.models.list() models = client.models.list()
# Select the first LLM # Select the first LLM
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama") llm = next(
model_id = llm.identifier m for m in models
if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
)
model_id = llm.id
print("Model:", model_id) print("Model:", model_id)
@ -279,8 +284,13 @@ import uuid
client = LlamaStackClient(base_url=f"http://localhost:8321") client = LlamaStackClient(base_url=f"http://localhost:8321")
models = client.models.list() models = client.models.list()
llm = next(m for m in models if m.model_type == "llm" and m.provider_id == "ollama") llm = next(
model_id = llm.identifier m for m in models
if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
)
model_id = llm.id
agent = Agent(client, model=model_id, instructions="You are a helpful assistant.") agent = Agent(client, model=model_id, instructions="You are a helpful assistant.")
@ -450,8 +460,11 @@ import uuid
client = LlamaStackClient(base_url="http://localhost:8321") client = LlamaStackClient(base_url="http://localhost:8321")
# Create a vector database instance # Create a vector database instance
embed_lm = next(m for m in client.models.list() if m.model_type == "embedding") embed_lm = next(
embedding_model = embed_lm.identifier m for m in client.models.list()
if m.custom_metadata and m.custom_metadata.get("model_type") == "embedding"
)
embedding_model = embed_lm.id
vector_db_id = f"v{uuid.uuid4().hex}" vector_db_id = f"v{uuid.uuid4().hex}"
# The VectorDB API is deprecated; the server now returns its own authoritative ID. # The VectorDB API is deprecated; the server now returns its own authoritative ID.
# We capture the correct ID from the response's .identifier attribute. # We capture the correct ID from the response's .identifier attribute.
@ -489,9 +502,11 @@ client.tool_runtime.rag_tool.insert(
llm = next( llm = next(
m m
for m in client.models.list() for m in client.models.list()
if m.model_type == "llm" and m.provider_id == "ollama" if m.custom_metadata
and m.custom_metadata.get("model_type") == "llm"
and m.custom_metadata.get("provider_id") == "ollama"
) )
model = llm.identifier model = llm.id
# Create the RAG agent # Create the RAG agent
rag_agent = Agent( rag_agent = Agent(

View file

@ -0,0 +1,27 @@
---
description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
sidebar_label: Remote - Openai
title: remote::openai
---
# remote::openai
## Description
OpenAI Files API provider for managing files through OpenAI's native file storage service.
## Configuration
| Field | Type | Required | Default | Description |
|-------|------|----------|---------|-------------|
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication |
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
## Sample Configuration
```yaml
api_key: ${env.OPENAI_API_KEY}
metadata_store:
table_name: openai_files_metadata
backend: sql_default
```

View file

@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM | | `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests | | `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. | | `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
## Sample Configuration ## Sample Configuration

File diff suppressed because it is too large Load diff

View file

@ -84,7 +84,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
) )
yaml_filename = f"{filename_prefix}llama-stack-spec.yaml" yaml_filename = f"{filename_prefix}llama-stack-spec.yaml"
html_filename = f"{filename_prefix}llama-stack-spec.html"
with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp: with open(output_dir / yaml_filename, "w", encoding="utf-8") as fp:
y = yaml.YAML() y = yaml.YAML()
@ -102,11 +101,6 @@ def generate_spec(output_dir: Path, stability_filter: str = None, main_spec: boo
fp, fp,
) )
with open(output_dir / html_filename, "w") as fp:
spec.write_html(fp, pretty_print=True)
print(f"Generated {yaml_filename} and {html_filename}")
def main(output_dir: str): def main(output_dir: str):
output_dir = Path(output_dir) output_dir = Path(output_dir)
if not output_dir.exists(): if not output_dir.exists():

View file

@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
'providers/eval/remote_nvidia' 'providers/eval/remote_nvidia'
], ],
}, },
{
type: 'category',
label: 'Telemetry',
collapsed: true,
items: [
'providers/telemetry/index',
'providers/telemetry/inline_meta-reference'
],
},
{ {
type: 'category', type: 'category',
label: 'Batches', label: 'Batches',

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,59 +0,0 @@
version: 2
distribution_spec:
description: CI tests for Llama Stack
providers:
inference:
- provider_type: remote::cerebras
- provider_type: remote::ollama
- provider_type: remote::vllm
- provider_type: remote::tgi
- provider_type: remote::fireworks
- provider_type: remote::together
- provider_type: remote::bedrock
- provider_type: remote::nvidia
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
- provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
- provider_type: inline::sqlite-vec
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:
- provider_type: inline::llama-guard
- provider_type: inline::code-scanner
agents:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
batches:
- provider_type: inline::reference
image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- sqlalchemy[asyncio]

View file

@ -1,281 +0,0 @@
version: 2
image_name: ci-tests
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/ci-tests}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/ci-tests/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ci-tests}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -1,33 +0,0 @@
version: 2
distribution_spec:
description: Dell's distribution of Llama Stack. TGI inference via Dell's custom
container
providers:
inference:
- provider_type: remote::tgi
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
- provider_type: remote::chromadb
- provider_type: remote::pgvector
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,144 +0,0 @@
version: 2
image_name: dell
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: tgi0
provider_type: remote::tgi
config:
url: ${env.DEH_URL}
- provider_id: tgi1
provider_type: remote::tgi
config:
url: ${env.DEH_SAFETY_URL}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: chromadb
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: tgi1
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,135 +0,0 @@
version: 2
image_name: dell
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: tgi0
provider_type: remote::tgi
config:
url: ${env.DEH_URL}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: chromadb
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/dell}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: tgi0
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: brave-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,32 +0,0 @@
version: 2
distribution_spec:
description: Use Meta Reference for running LLM inference
providers:
inference:
- provider_type: inline::meta-reference
vector_io:
- provider_type: inline::faiss
- provider_type: remote::chromadb
- provider_type: remote::pgvector
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,157 +0,0 @@
version: 2
image_name: meta-reference-gpu
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
- provider_id: meta-reference-safety
provider_type: inline::meta-reference
config:
model: ${env.SAFETY_MODEL}
checkpoint_dir: ${env.SAFETY_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: meta-reference-safety
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: ${env.SAFETY_MODEL}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,142 +0,0 @@
version: 2
image_name: meta-reference-gpu
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: meta-reference-inference
provider_type: inline::meta-reference
config:
model: ${env.INFERENCE_MODEL}
checkpoint_dir: ${env.INFERENCE_CHECKPOINT_DIR:=null}
quantization:
type: ${env.QUANTIZATION_TYPE:=bf16}
model_parallel_size: ${env.MODEL_PARALLEL_SIZE:=0}
max_batch_size: ${env.MAX_BATCH_SIZE:=1}
max_seq_len: ${env.MAX_SEQ_LEN:=4096}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/meta-reference-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: meta-reference-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,29 +0,0 @@
version: 2
distribution_spec:
description: Use NVIDIA NIM for running LLM inference, evaluation and safety
providers:
inference:
- provider_type: remote::nvidia
vector_io:
- provider_type: inline::faiss
safety:
- provider_type: remote::nvidia
agents:
- provider_type: inline::meta-reference
eval:
- provider_type: remote::nvidia
post_training:
- provider_type: remote::nvidia
datasetio:
- provider_type: inline::localfs
- provider_type: remote::nvidia
scoring:
- provider_type: inline::basic
tool_runtime:
- provider_type: inline::rag-runtime
files:
- provider_type: inline::localfs
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,140 +0,0 @@
version: 2
image_name: nvidia
apis:
- agents
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: nvidia
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: nvidia
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: nvidia
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: nvidia
provider_type: remote::nvidia
config:
evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
post_training:
- provider_id: nvidia
provider_type: remote::nvidia
config:
api_key: ${env.NVIDIA_API_KEY:=}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
datasetio:
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
- provider_id: nvidia
provider_type: remote::nvidia
config:
api_key: ${env.NVIDIA_API_KEY:=}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
scoring:
- provider_id: basic
provider_type: inline::basic
tool_runtime:
- provider_id: rag-runtime
provider_type: inline::rag-runtime
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store:
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: nvidia
model_type: llm
- metadata: {}
model_id: ${env.SAFETY_MODEL}
provider_id: nvidia
model_type: llm
shields:
- shield_id: ${env.SAFETY_MODEL}
provider_id: nvidia
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,119 +0,0 @@
version: 2
image_name: nvidia
apis:
- agents
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: nvidia
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: nvidia
provider_type: remote::nvidia
config:
guardrails_service_url: ${env.GUARDRAILS_SERVICE_URL:=http://localhost:7331}
config_id: ${env.NVIDIA_GUARDRAILS_CONFIG_ID:=self-check}
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: nvidia
provider_type: remote::nvidia
config:
evaluator_url: ${env.NVIDIA_EVALUATOR_URL:=http://localhost:7331}
post_training:
- provider_id: nvidia
provider_type: remote::nvidia
config:
api_key: ${env.NVIDIA_API_KEY:=}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
customizer_url: ${env.NVIDIA_CUSTOMIZER_URL:=http://nemo.test}
datasetio:
- provider_id: nvidia
provider_type: remote::nvidia
config:
api_key: ${env.NVIDIA_API_KEY:=}
dataset_namespace: ${env.NVIDIA_DATASET_NAMESPACE:=default}
project_id: ${env.NVIDIA_PROJECT_ID:=test-project}
datasets_url: ${env.NVIDIA_DATASETS_URL:=http://nemo.test}
scoring:
- provider_id: basic
provider_type: inline::basic
tool_runtime:
- provider_id: rag-runtime
provider_type: inline::rag-runtime
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/nvidia/files}
metadata_store:
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/nvidia}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,36 +0,0 @@
version: 2
distribution_spec:
description: Distribution for running open benchmarks
providers:
inference:
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::groq
- provider_type: remote::together
vector_io:
- provider_type: inline::sqlite-vec
- provider_type: remote::chromadb
- provider_type: remote::pgvector
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,255 +0,0 @@
version: 2
image_name: open-benchmark
apis:
- agents
- datasetio
- eval
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
vector_io:
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.ENABLE_PGVECTOR:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/open-benchmark}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: gpt-4o
provider_id: openai
provider_model_id: gpt-4o
model_type: llm
- metadata: {}
model_id: claude-3-5-sonnet-latest
provider_id: anthropic
provider_model_id: claude-3-5-sonnet-latest
model_type: llm
- metadata: {}
model_id: gemini/gemini-1.5-flash
provider_id: gemini
provider_model_id: gemini/gemini-1.5-flash
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.3-70B-Instruct
provider_id: groq
provider_model_id: groq/llama-3.3-70b-versatile
model_type: llm
- metadata: {}
model_id: meta-llama/Llama-3.1-405B-Instruct
provider_id: together
provider_model_id: meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo
model_type: llm
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets:
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/simpleqa?split=train
metadata: {}
dataset_id: simpleqa
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/mmlu_cot?split=test&name=all
metadata: {}
dataset_id: mmlu_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/gpqa_0shot_cot?split=test&name=gpqa_main
metadata: {}
dataset_id: gpqa_cot
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/math_500?split=test
metadata: {}
dataset_id: math_500
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/IfEval?split=train
metadata: {}
dataset_id: ifeval
- purpose: eval/messages-answer
source:
type: uri
uri: huggingface://datasets/llamastack/docvqa?split=val
metadata: {}
dataset_id: docvqa
scoring_fns: []
benchmarks:
- dataset_id: simpleqa
scoring_functions:
- llm-as-judge::405b-simpleqa
metadata: {}
benchmark_id: meta-reference-simpleqa
- dataset_id: mmlu_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-mmlu-cot
- dataset_id: gpqa_cot
scoring_functions:
- basic::regex_parser_multiple_choice_answer
metadata: {}
benchmark_id: meta-reference-gpqa-cot
- dataset_id: math_500
scoring_functions:
- basic::regex_parser_math_response
metadata: {}
benchmark_id: meta-reference-math-500
- dataset_id: ifeval
scoring_functions:
- basic::ifeval
metadata: {}
benchmark_id: meta-reference-ifeval
- dataset_id: docvqa
scoring_functions:
- basic::docvqa
metadata: {}
benchmark_id: meta-reference-docvqa
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,23 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers
providers:
inference:
- provider_type: remote::vllm
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: remote::chromadb
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
image_type: venv
additional_pip_packages:
- asyncpg
- psycopg2-binary
- sqlalchemy[asyncio]

View file

@ -1,118 +0,0 @@
version: 2
image_name: postgres-demo
apis:
- agents
- inference
- safety
- tool_runtime
- vector_io
providers:
inference:
- provider_id: vllm-inference
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=http://localhost:8000/v1}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: ${env.ENABLE_CHROMADB:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
storage:
backends:
kv_default:
type: kv_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
table_name: ${env.POSTGRES_TABLE_NAME:=llamastack_kvstore}
sql_default:
type: sql_postgres
host: ${env.POSTGRES_HOST:=localhost}
port: ${env.POSTGRES_PORT:=5432}
db: ${env.POSTGRES_DB:=llamastack}
user: ${env.POSTGRES_USER:=llamastack}
password: ${env.POSTGRES_PASSWORD:=llamastack}
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models:
- metadata: {}
model_id: ${env.INFERENCE_MODEL}
provider_id: vllm-inference
model_type: llm
- metadata:
embedding_dimension: 768
model_id: nomic-embed-text-v1.5
provider_id: sentence-transformers
model_type: embedding
shields:
- shield_id: meta-llama/Llama-Guard-3-8B
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -1,60 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers.
This distribution is intended for GPU-enabled environments.
providers:
inference:
- provider_type: remote::cerebras
- provider_type: remote::ollama
- provider_type: remote::vllm
- provider_type: remote::tgi
- provider_type: remote::fireworks
- provider_type: remote::together
- provider_type: remote::bedrock
- provider_type: remote::nvidia
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
- provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
- provider_type: inline::sqlite-vec
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:
- provider_type: inline::llama-guard
- provider_type: inline::code-scanner
agents:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::huggingface-gpu
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
batches:
- provider_type: inline::reference
image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- sqlalchemy[asyncio]

View file

@ -1,284 +0,0 @@
version: 2
image_name: starter-gpu
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter-gpu}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter-gpu/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: huggingface-gpu
provider_type: inline::huggingface-gpu
config:
checkpoint_format: huggingface
distributed_backend: null
device: cpu
dpo_output_dir: ~/.llama/distributions/starter-gpu/dpo_output
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter-gpu}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -1,60 +0,0 @@
version: 2
distribution_spec:
description: Quick start template for running Llama Stack with several popular providers.
This distribution is intended for CPU-only environments.
providers:
inference:
- provider_type: remote::cerebras
- provider_type: remote::ollama
- provider_type: remote::vllm
- provider_type: remote::tgi
- provider_type: remote::fireworks
- provider_type: remote::together
- provider_type: remote::bedrock
- provider_type: remote::nvidia
- provider_type: remote::openai
- provider_type: remote::anthropic
- provider_type: remote::gemini
- provider_type: remote::vertexai
- provider_type: remote::groq
- provider_type: remote::sambanova
- provider_type: remote::azure
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
- provider_type: inline::sqlite-vec
- provider_type: inline::milvus
- provider_type: remote::chromadb
- provider_type: remote::pgvector
- provider_type: remote::qdrant
- provider_type: remote::weaviate
files:
- provider_type: inline::localfs
safety:
- provider_type: inline::llama-guard
- provider_type: inline::code-scanner
agents:
- provider_type: inline::meta-reference
post_training:
- provider_type: inline::torchtune-cpu
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
batches:
- provider_type: inline::reference
image_type: venv
additional_pip_packages:
- aiosqlite
- asyncpg
- sqlalchemy[asyncio]

View file

@ -1,281 +0,0 @@
version: 2
image_name: starter
apis:
- agents
- batches
- datasetio
- eval
- files
- inference
- post_training
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: ${env.CEREBRAS_API_KEY:+cerebras}
provider_type: remote::cerebras
config:
base_url: https://api.cerebras.ai
api_key: ${env.CEREBRAS_API_KEY:=}
- provider_id: ${env.OLLAMA_URL:+ollama}
provider_type: remote::ollama
config:
url: ${env.OLLAMA_URL:=http://localhost:11434}
- provider_id: ${env.VLLM_URL:+vllm}
provider_type: remote::vllm
config:
url: ${env.VLLM_URL:=}
max_tokens: ${env.VLLM_MAX_TOKENS:=4096}
api_token: ${env.VLLM_API_TOKEN:=fake}
tls_verify: ${env.VLLM_TLS_VERIFY:=true}
- provider_id: ${env.TGI_URL:+tgi}
provider_type: remote::tgi
config:
url: ${env.TGI_URL:=}
- provider_id: fireworks
provider_type: remote::fireworks
config:
url: https://api.fireworks.ai/inference/v1
api_key: ${env.FIREWORKS_API_KEY:=}
- provider_id: together
provider_type: remote::together
config:
url: https://api.together.xyz/v1
api_key: ${env.TOGETHER_API_KEY:=}
- provider_id: bedrock
provider_type: remote::bedrock
- provider_id: ${env.NVIDIA_API_KEY:+nvidia}
provider_type: remote::nvidia
config:
url: ${env.NVIDIA_BASE_URL:=https://integrate.api.nvidia.com}
api_key: ${env.NVIDIA_API_KEY:=}
append_api_version: ${env.NVIDIA_APPEND_API_VERSION:=True}
- provider_id: openai
provider_type: remote::openai
config:
api_key: ${env.OPENAI_API_KEY:=}
base_url: ${env.OPENAI_BASE_URL:=https://api.openai.com/v1}
- provider_id: anthropic
provider_type: remote::anthropic
config:
api_key: ${env.ANTHROPIC_API_KEY:=}
- provider_id: gemini
provider_type: remote::gemini
config:
api_key: ${env.GEMINI_API_KEY:=}
- provider_id: ${env.VERTEX_AI_PROJECT:+vertexai}
provider_type: remote::vertexai
config:
project: ${env.VERTEX_AI_PROJECT:=}
location: ${env.VERTEX_AI_LOCATION:=us-central1}
- provider_id: groq
provider_type: remote::groq
config:
url: https://api.groq.com
api_key: ${env.GROQ_API_KEY:=}
- provider_id: sambanova
provider_type: remote::sambanova
config:
url: https://api.sambanova.ai/v1
api_key: ${env.SAMBANOVA_API_KEY:=}
- provider_id: ${env.AZURE_API_KEY:+azure}
provider_type: remote::azure
config:
api_key: ${env.AZURE_API_KEY:=}
api_base: ${env.AZURE_API_BASE:=}
api_version: ${env.AZURE_API_VERSION:=}
api_type: ${env.AZURE_API_TYPE:=}
- provider_id: sentence-transformers
provider_type: inline::sentence-transformers
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
- provider_id: sqlite-vec
provider_type: inline::sqlite-vec
config:
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sqlite_vec.db
persistence:
namespace: vector_io::sqlite_vec
backend: kv_default
- provider_id: ${env.MILVUS_URL:+milvus}
provider_type: inline::milvus
config:
db_path: ${env.MILVUS_DB_PATH:=~/.llama/distributions/starter}/milvus.db
persistence:
namespace: vector_io::milvus
backend: kv_default
- provider_id: ${env.CHROMADB_URL:+chromadb}
provider_type: remote::chromadb
config:
url: ${env.CHROMADB_URL:=}
persistence:
namespace: vector_io::chroma_remote
backend: kv_default
- provider_id: ${env.PGVECTOR_DB:+pgvector}
provider_type: remote::pgvector
config:
host: ${env.PGVECTOR_HOST:=localhost}
port: ${env.PGVECTOR_PORT:=5432}
db: ${env.PGVECTOR_DB:=}
user: ${env.PGVECTOR_USER:=}
password: ${env.PGVECTOR_PASSWORD:=}
persistence:
namespace: vector_io::pgvector
backend: kv_default
- provider_id: ${env.QDRANT_URL:+qdrant}
provider_type: remote::qdrant
config:
api_key: ${env.QDRANT_API_KEY:=}
persistence:
namespace: vector_io::qdrant_remote
backend: kv_default
- provider_id: ${env.WEAVIATE_CLUSTER_URL:+weaviate}
provider_type: remote::weaviate
config:
weaviate_api_key: null
weaviate_cluster_url: ${env.WEAVIATE_CLUSTER_URL:=localhost:8080}
persistence:
namespace: vector_io::weaviate
backend: kv_default
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
metadata_store:
table_name: files_metadata
backend: sql_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
- provider_id: code-scanner
provider_type: inline::code-scanner
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
post_training:
- provider_id: torchtune-cpu
provider_type: inline::torchtune-cpu
config:
checkpoint_format: meta
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
batches:
- provider_id: reference
provider_type: inline::reference
config:
kvstore:
namespace: batches
backend: kv_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields:
- shield_id: llama-guard
provider_id: ${env.SAFETY_MODEL:+llama-guard}
provider_shield_id: ${env.SAFETY_MODEL:=}
- shield_id: code-scanner
provider_id: ${env.CODE_SCANNER_MODEL:+code-scanner}
provider_shield_id: ${env.CODE_SCANNER_MODEL:=}
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true
vector_stores:
default_provider_id: faiss
default_embedding_model:
provider_id: sentence-transformers
model_id: nomic-ai/nomic-embed-text-v1.5
safety:
default_shield_id: llama-guard

View file

@ -1,33 +0,0 @@
version: 2
distribution_spec:
description: Use watsonx for running LLM inference
providers:
inference:
- provider_type: remote::watsonx
- provider_type: inline::sentence-transformers
vector_io:
- provider_type: inline::faiss
safety:
- provider_type: inline::llama-guard
agents:
- provider_type: inline::meta-reference
eval:
- provider_type: inline::meta-reference
datasetio:
- provider_type: remote::huggingface
- provider_type: inline::localfs
scoring:
- provider_type: inline::basic
- provider_type: inline::llm-as-judge
- provider_type: inline::braintrust
tool_runtime:
- provider_type: remote::brave-search
- provider_type: remote::tavily-search
- provider_type: inline::rag-runtime
- provider_type: remote::model-context-protocol
files:
- provider_type: inline::localfs
image_type: venv
additional_pip_packages:
- aiosqlite
- sqlalchemy[asyncio]

View file

@ -1,136 +0,0 @@
version: 2
image_name: watsonx
apis:
- agents
- datasetio
- eval
- files
- inference
- safety
- scoring
- tool_runtime
- vector_io
providers:
inference:
- provider_id: watsonx
provider_type: remote::watsonx
config:
url: ${env.WATSONX_BASE_URL:=https://us-south.ml.cloud.ibm.com}
api_key: ${env.WATSONX_API_KEY:=}
project_id: ${env.WATSONX_PROJECT_ID:=}
vector_io:
- provider_id: faiss
provider_type: inline::faiss
config:
persistence:
namespace: vector_io::faiss
backend: kv_default
safety:
- provider_id: llama-guard
provider_type: inline::llama-guard
config:
excluded_categories: []
agents:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
persistence:
agent_state:
namespace: agents
backend: kv_default
responses:
table_name: responses
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
eval:
- provider_id: meta-reference
provider_type: inline::meta-reference
config:
kvstore:
namespace: eval
backend: kv_default
datasetio:
- provider_id: huggingface
provider_type: remote::huggingface
config:
kvstore:
namespace: datasetio::huggingface
backend: kv_default
- provider_id: localfs
provider_type: inline::localfs
config:
kvstore:
namespace: datasetio::localfs
backend: kv_default
scoring:
- provider_id: basic
provider_type: inline::basic
- provider_id: llm-as-judge
provider_type: inline::llm-as-judge
- provider_id: braintrust
provider_type: inline::braintrust
config:
openai_api_key: ${env.OPENAI_API_KEY:=}
tool_runtime:
- provider_id: brave-search
provider_type: remote::brave-search
config:
api_key: ${env.BRAVE_SEARCH_API_KEY:=}
max_results: 3
- provider_id: tavily-search
provider_type: remote::tavily-search
config:
api_key: ${env.TAVILY_SEARCH_API_KEY:=}
max_results: 3
- provider_id: rag-runtime
provider_type: inline::rag-runtime
- provider_id: model-context-protocol
provider_type: remote::model-context-protocol
files:
- provider_id: meta-reference-files
provider_type: inline::localfs
config:
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/watsonx/files}
metadata_store:
table_name: files_metadata
backend: sql_default
storage:
backends:
kv_default:
type: kv_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/kvstore.db
sql_default:
type: sql_sqlite
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/watsonx}/sql_store.db
stores:
metadata:
namespace: registry
backend: kv_default
inference:
table_name: inference_store
backend: sql_default
max_write_queue_size: 10000
num_writers: 4
conversations:
table_name: openai_conversations
backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources:
models: []
shields: []
vector_dbs: []
datasets: []
scoring_fns: []
benchmarks: []
tool_groups:
- toolgroup_id: builtin::websearch
provider_id: tavily-search
- toolgroup_id: builtin::rag
provider_id: rag-runtime
server:
port: 8321
telemetry:
enabled: true

View file

@ -7,7 +7,7 @@ required-version = ">=0.7.0"
[project] [project]
name = "llama_stack" name = "llama_stack"
version = "0.3.0" version = "0.4.0.dev0"
authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }] authors = [{ name = "Meta Llama", email = "llama-oss@meta.com" }]
description = "Llama Stack" description = "Llama Stack"
readme = "README.md" readme = "README.md"
@ -71,11 +71,40 @@ dev = [
"nbval", # For notebook testing "nbval", # For notebook testing
"black", "black",
"ruff", "ruff",
"types-requests", "mypy",
"types-setuptools",
"pre-commit", "pre-commit",
"ruamel.yaml", # needed for openapi generator "ruamel.yaml", # needed for openapi generator
] ]
# Type checking dependencies - includes type stubs and optional runtime dependencies
# needed for complete mypy coverage across all optional features
type_checking = [
"types-requests",
"types-setuptools",
"types-jsonschema",
"pandas-stubs",
"types-psutil",
"types-tqdm",
"boto3-stubs[s3]",
"streamlit",
"streamlit-option-menu",
"pandas",
"anthropic",
"databricks-sdk",
"fairscale",
"torchtune",
"trl",
"peft",
"datasets",
"together",
"nest-asyncio",
"pymongo",
"torchvision",
"sqlite-vec",
"faiss-cpu",
"lm-format-enforcer",
"mcp",
"ollama",
]
# These are the dependencies required for running unit tests. # These are the dependencies required for running unit tests.
unit = [ unit = [
"anthropic", "anthropic",
@ -255,7 +284,6 @@ exclude = [
"^src/llama_stack/models/llama/llama3/interface\\.py$", "^src/llama_stack/models/llama/llama3/interface\\.py$",
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$", "^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$", "^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
"^src/llama_stack/providers/inline/agents/meta_reference/",
"^src/llama_stack/providers/inline/datasetio/localfs/", "^src/llama_stack/providers/inline/datasetio/localfs/",
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$", "^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$", "^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
@ -316,7 +344,17 @@ exclude = [
[[tool.mypy.overrides]] [[tool.mypy.overrides]]
# packages that lack typing annotations, do not have stubs, or are unavailable. # packages that lack typing annotations, do not have stubs, or are unavailable.
module = ["yaml", "fire"] module = [
"yaml",
"fire",
"torchtune.*",
"fairscale.*",
"torchvision.*",
"datasets",
"nest_asyncio",
"streamlit_option_menu",
"lmformatenforcer.*",
]
ignore_missing_imports = true ignore_missing_imports = true
[tool.pydantic-mypy] [tool.pydantic-mypy]

View file

@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke
if template_func := getattr(module, "get_distribution_template", None): if template_func := getattr(module, "get_distribution_template", None):
distro = template_func() distro = template_func()
yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro" doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
change_tracker.add_paths(yaml_output_dir, doc_output_dir) change_tracker.add_paths(yaml_output_dir, doc_output_dir)
distro.save_distribution( distro.save_distribution(

View file

@ -215,6 +215,16 @@ build_image() {
--build-arg "LLAMA_STACK_DIR=/workspace" --build-arg "LLAMA_STACK_DIR=/workspace"
) )
# Pass UV index configuration for release branches
if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
fi
if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
fi
if ! "${build_cmd[@]}"; then if ! "${build_cmd[@]}"; then
echo "❌ Failed to build Docker image" echo "❌ Failed to build Docker image"
exit 1 exit 1

View file

@ -23,7 +23,7 @@ COLLECT_ONLY=false
# Function to display usage # Function to display usage
usage() { usage() {
cat << EOF cat <<EOF
Usage: $0 [OPTIONS] Usage: $0 [OPTIONS]
Options: Options:
@ -102,7 +102,6 @@ while [[ $# -gt 0 ]]; do
esac esac
done done
# Validate required parameters # Validate required parameters
if [[ -z "$STACK_CONFIG" && "$COLLECT_ONLY" == false ]]; then if [[ -z "$STACK_CONFIG" && "$COLLECT_ONLY" == false ]]; then
echo "Error: --stack-config is required" echo "Error: --stack-config is required"
@ -177,21 +176,45 @@ cd $ROOT_DIR
# check if "llama" and "pytest" are available. this script does not use `uv run` given # check if "llama" and "pytest" are available. this script does not use `uv run` given
# it can be used in a pre-release environment where we have not been able to tell # it can be used in a pre-release environment where we have not been able to tell
# uv about pre-release dependencies properly (yet). # uv about pre-release dependencies properly (yet).
if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &> /dev/null; then if [[ "$COLLECT_ONLY" == false ]] && ! command -v llama &>/dev/null; then
echo "llama could not be found, ensure llama-stack is installed" echo "llama could not be found, ensure llama-stack is installed"
exit 1 exit 1
fi fi
if ! command -v pytest &> /dev/null; then if ! command -v pytest &>/dev/null; then
echo "pytest could not be found, ensure pytest is installed" echo "pytest could not be found, ensure pytest is installed"
exit 1 exit 1
fi fi
# Helper function to find next available port
find_available_port() {
local start_port=$1
local port=$start_port
for ((i=0; i<100; i++)); do
if ! lsof -Pi :$port -sTCP:LISTEN -t >/dev/null 2>&1; then
echo $port
return 0
fi
((port++))
done
echo "Failed to find available port starting from $start_port" >&2
return 1
}
# Start Llama Stack Server if needed # Start Llama Stack Server if needed
if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
# Find an available port for the server
LLAMA_STACK_PORT=$(find_available_port 8321)
if [[ $? -ne 0 ]]; then
echo "Error: $LLAMA_STACK_PORT"
exit 1
fi
export LLAMA_STACK_PORT
echo "Will use port: $LLAMA_STACK_PORT"
stop_server() { stop_server() {
echo "Stopping Llama Stack Server..." echo "Stopping Llama Stack Server..."
pids=$(lsof -i :8321 | awk 'NR>1 {print $2}') pids=$(lsof -i :$LLAMA_STACK_PORT | awk 'NR>1 {print $2}')
if [[ -n "$pids" ]]; then if [[ -n "$pids" ]]; then
echo "Killing Llama Stack Server processes: $pids" echo "Killing Llama Stack Server processes: $pids"
kill -9 $pids kill -9 $pids
@ -201,20 +224,25 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
echo "Llama Stack Server stopped" echo "Llama Stack Server stopped"
} }
# check if server is already running
if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then
echo "Llama Stack Server is already running, skipping start"
else
echo "=== Starting Llama Stack Server ===" echo "=== Starting Llama Stack Server ==="
export LLAMA_STACK_LOG_WIDTH=120 export LLAMA_STACK_LOG_WIDTH=120
# Configure telemetry collector for server mode
# Use a fixed port for the OTEL collector so the server can connect to it
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
export OTEL_BSP_SCHEDULE_DELAY="200"
export OTEL_BSP_EXPORT_TIMEOUT="2000"
# remove "server:" from STACK_CONFIG # remove "server:" from STACK_CONFIG
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://') stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
nohup llama stack run $stack_config > server.log 2>&1 & nohup llama stack run $stack_config >server.log 2>&1 &
echo "Waiting for Llama Stack Server to start..." echo "Waiting for Llama Stack Server to start on port $LLAMA_STACK_PORT..."
for i in {1..30}; do for i in {1..30}; do
if curl -s http://localhost:8321/v1/health 2>/dev/null | grep -q "OK"; then if curl -s http://localhost:$LLAMA_STACK_PORT/v1/health 2>/dev/null | grep -q "OK"; then
echo "✅ Llama Stack Server started successfully" echo "✅ Llama Stack Server started successfully"
break break
fi fi
@ -227,7 +255,6 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
sleep 1 sleep 1
done done
echo "" echo ""
fi
trap stop_server EXIT ERR INT TERM trap stop_server EXIT ERR INT TERM
fi fi
@ -239,7 +266,7 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
container_name="llama-stack-test-$DISTRO" container_name="llama-stack-test-$DISTRO"
if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then if docker ps -a --format '{{.Names}}' | grep -q "^${container_name}$"; then
echo "Dumping container logs before stopping..." echo "Dumping container logs before stopping..."
docker logs "$container_name" > "docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true docker logs "$container_name" >"docker-${DISTRO}-${INFERENCE_MODE}.log" 2>&1 || true
echo "Stopping and removing container: $container_name" echo "Stopping and removing container: $container_name"
docker stop "$container_name" 2>/dev/null || true docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true
@ -251,7 +278,14 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
# Extract distribution name from docker:distro format # Extract distribution name from docker:distro format
DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://') DISTRO=$(echo "$STACK_CONFIG" | sed 's/^docker://')
export LLAMA_STACK_PORT=8321 # Find an available port for the docker container
LLAMA_STACK_PORT=$(find_available_port 8321)
if [[ $? -ne 0 ]]; then
echo "Error: $LLAMA_STACK_PORT"
exit 1
fi
export LLAMA_STACK_PORT
echo "Will use port: $LLAMA_STACK_PORT"
echo "=== Building Docker Image for distribution: $DISTRO ===" echo "=== Building Docker Image for distribution: $DISTRO ==="
containerfile="$ROOT_DIR/containers/Containerfile" containerfile="$ROOT_DIR/containers/Containerfile"
@ -271,6 +305,16 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
--build-arg "LLAMA_STACK_DIR=/workspace" --build-arg "LLAMA_STACK_DIR=/workspace"
) )
# Pass UV index configuration for release branches
if [[ -n "${UV_EXTRA_INDEX_URL:-}" ]]; then
echo "Adding UV_EXTRA_INDEX_URL to docker build: $UV_EXTRA_INDEX_URL"
build_cmd+=(--build-arg "UV_EXTRA_INDEX_URL=$UV_EXTRA_INDEX_URL")
fi
if [[ -n "${UV_INDEX_STRATEGY:-}" ]]; then
echo "Adding UV_INDEX_STRATEGY to docker build: $UV_INDEX_STRATEGY"
build_cmd+=(--build-arg "UV_INDEX_STRATEGY=$UV_INDEX_STRATEGY")
fi
if ! "${build_cmd[@]}"; then if ! "${build_cmd[@]}"; then
echo "❌ Failed to build Docker image" echo "❌ Failed to build Docker image"
exit 1 exit 1
@ -284,10 +328,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
docker stop "$container_name" 2>/dev/null || true docker stop "$container_name" 2>/dev/null || true
docker rm "$container_name" 2>/dev/null || true docker rm "$container_name" 2>/dev/null || true
# Configure telemetry collector port shared between host and container
COLLECTOR_PORT=4317
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
# Build environment variables for docker run # Build environment variables for docker run
DOCKER_ENV_VARS="" DOCKER_ENV_VARS=""
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server" DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
# Pass through API keys if they exist # Pass through API keys if they exist
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY" [ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
@ -308,8 +357,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
fi fi
echo "Using image: $IMAGE_NAME" echo "Using image: $IMAGE_NAME"
docker run -d --network host --name "$container_name" \ # On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ # Use regular port mapping instead
NETWORK_MODE=""
PORT_MAPPINGS=""
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
NETWORK_MODE="--network host"
else
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
echo "Using bridge networking with port mapping (non-Linux)"
fi
docker run -d $NETWORK_MODE --name "$container_name" \
$PORT_MAPPINGS \
$DOCKER_ENV_VARS \ $DOCKER_ENV_VARS \
"$IMAGE_NAME" \ "$IMAGE_NAME" \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
@ -411,17 +472,13 @@ elif [ $exit_code -eq 5 ]; then
else else
echo "❌ Tests failed" echo "❌ Tests failed"
echo "" echo ""
echo "=== Dumping last 100 lines of logs for debugging ==="
# Output server or container logs based on stack config # Output server or container logs based on stack config
if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then if [[ "$STACK_CONFIG" == *"server:"* && -f "server.log" ]]; then
echo "--- Last 100 lines of server.log ---" echo "--- Server side failures can be located inside server.log (available from artifacts on CI) ---"
tail -100 server.log
elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then elif [[ "$STACK_CONFIG" == *"docker:"* ]]; then
docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log" docker_log_file="docker-${DISTRO}-${INFERENCE_MODE}.log"
if [[ -f "$docker_log_file" ]]; then if [[ -f "$docker_log_file" ]]; then
echo "--- Last 100 lines of $docker_log_file ---" echo "--- Server side failures can be located inside $docker_log_file (available from artifacts on CI) ---"
tail -100 "$docker_log_file"
fi fi
fi fi

42
scripts/uv-run-with-index.sh Executable file
View file

@ -0,0 +1,42 @@
#!/bin/bash
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
set -euo pipefail
# Detect current branch and target branch
# In GitHub Actions, use GITHUB_REF/GITHUB_BASE_REF
if [[ -n "${GITHUB_REF:-}" ]]; then
BRANCH="${GITHUB_REF#refs/heads/}"
else
BRANCH=$(git rev-parse --abbrev-ref HEAD 2>/dev/null || echo "")
fi
# For PRs, check the target branch
if [[ -n "${GITHUB_BASE_REF:-}" ]]; then
TARGET_BRANCH="${GITHUB_BASE_REF}"
else
TARGET_BRANCH=$(git rev-parse --abbrev-ref HEAD@{upstream} 2>/dev/null | sed 's|origin/||' || echo "")
fi
# Check if on a release branch or targeting one, or LLAMA_STACK_RELEASE_MODE is set
IS_RELEASE=false
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
IS_RELEASE=true
elif [[ "$TARGET_BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
IS_RELEASE=true
elif [[ "${LLAMA_STACK_RELEASE_MODE:-}" == "true" ]]; then
IS_RELEASE=true
fi
# On release branches, use test.pypi as extra index for RC versions
if [[ "$IS_RELEASE" == "true" ]]; then
export UV_EXTRA_INDEX_URL="https://test.pypi.org/simple/"
export UV_INDEX_STRATEGY="unsafe-best-match"
fi
# Run uv with all arguments passed through
exec uv "$@"

View file

@ -38,6 +38,7 @@ from .openai_responses import (
OpenAIResponseInputTool, OpenAIResponseInputTool,
OpenAIResponseObject, OpenAIResponseObject,
OpenAIResponseObjectStream, OpenAIResponseObjectStream,
OpenAIResponsePrompt,
OpenAIResponseText, OpenAIResponseText,
) )
@ -490,13 +491,6 @@ class Agents(Protocol):
APIs for creating and interacting with agentic systems.""" APIs for creating and interacting with agentic systems."""
@webmethod(
route="/agents",
method="POST",
descriptive_name="create_agent",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents", route="/agents",
method="POST", method="POST",
@ -514,13 +508,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn",
method="POST",
descriptive_name="create_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn", route="/agents/{agent_id}/session/{session_id}/turn",
method="POST", method="POST",
@ -551,13 +538,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST",
descriptive_name="resume_agent_turn",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume",
method="POST", method="POST",
@ -585,12 +565,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}",
method="GET", method="GET",
@ -611,12 +585,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}", route="/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}",
method="GET", method="GET",
@ -639,13 +607,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session",
method="POST",
descriptive_name="create_agent_session",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session", route="/agents/{agent_id}/session",
method="POST", method="POST",
@ -665,12 +626,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}", route="/agents/{agent_id}/session/{session_id}",
method="GET", method="GET",
@ -691,12 +646,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/session/{session_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod( @webmethod(
route="/agents/{agent_id}/session/{session_id}", route="/agents/{agent_id}/session/{session_id}",
method="DELETE", method="DELETE",
@ -714,12 +663,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}",
method="DELETE",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def delete_agent( async def delete_agent(
self, self,
@ -731,7 +674,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/agents", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse: async def list_agents(self, start_index: int | None = None, limit: int | None = None) -> PaginatedResponse:
"""List all agents. """List all agents.
@ -742,12 +684,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_agent(self, agent_id: str) -> Agent: async def get_agent(self, agent_id: str) -> Agent:
"""Describe an agent by its ID. """Describe an agent by its ID.
@ -757,12 +693,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/agents/{agent_id}/sessions",
method="GET",
deprecated=True,
level=LLAMA_STACK_API_V1,
)
@webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/agents/{agent_id}/sessions", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_agent_sessions( async def list_agent_sessions(
self, self,
@ -786,12 +716,6 @@ class Agents(Protocol):
# #
# Both of these APIs are inherently stateful. # Both of these APIs are inherently stateful.
@webmethod(
route="/openai/v1/responses/{response_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_openai_response( async def get_openai_response(
self, self,
@ -804,12 +728,12 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/responses", method="POST", level=LLAMA_STACK_API_V1)
async def create_openai_response( async def create_openai_response(
self, self,
input: str | list[OpenAIResponseInput], input: str | list[OpenAIResponseInput],
model: str, model: str,
prompt: OpenAIResponsePrompt | None = None,
instructions: str | None = None, instructions: str | None = None,
previous_response_id: str | None = None, previous_response_id: str | None = None,
conversation: str | None = None, conversation: str | None = None,
@ -831,6 +755,7 @@ class Agents(Protocol):
:param input: Input message(s) to create the response. :param input: Input message(s) to create the response.
:param model: The underlying LLM used for completions. :param model: The underlying LLM used for completions.
:param prompt: (Optional) Prompt object with ID, version, and variables.
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses. :param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation. :param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
:param include: (Optional) Additional fields to include in the response. :param include: (Optional) Additional fields to include in the response.
@ -839,7 +764,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_responses( async def list_openai_responses(
self, self,
@ -858,9 +782,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}/input_items", method="GET", level=LLAMA_STACK_API_V1)
async def list_openai_response_input_items( async def list_openai_response_input_items(
self, self,
@ -883,7 +804,6 @@ class Agents(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/responses/{response_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject: async def delete_openai_response(self, response_id: str) -> OpenAIDeleteResponseObject:
"""Delete a response. """Delete a response.

View file

@ -4,9 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from collections.abc import Sequence
from typing import Annotated, Any, Literal from typing import Annotated, Any, Literal
from pydantic import BaseModel, Field from pydantic import BaseModel, Field, model_validator
from typing_extensions import TypedDict from typing_extensions import TypedDict
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
@ -46,23 +47,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
:param detail: Level of detail for image processing, can be "low", "high", or "auto" :param detail: Level of detail for image processing, can be "low", "high", or "auto"
:param type: Content type identifier, always "input_image" :param type: Content type identifier, always "input_image"
:param file_id: (Optional) The ID of the file to be sent to the model.
:param image_url: (Optional) URL of the image content :param image_url: (Optional) URL of the image content
""" """
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto" detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
type: Literal["input_image"] = "input_image" type: Literal["input_image"] = "input_image"
# TODO: handle file_id file_id: str | None = None
image_url: str | None = None image_url: str | None = None
# TODO: handle file content types @json_schema_type
class OpenAIResponseInputMessageContentFile(BaseModel):
"""File content for input messages in OpenAI response format.
:param type: The type of the input item. Always `input_file`.
:param file_data: The data of the file to be sent to the model.
:param file_id: (Optional) The ID of the file to be sent to the model.
:param file_url: The URL of the file to be sent to the model.
:param filename: The name of the file to be sent to the model.
"""
type: Literal["input_file"] = "input_file"
file_data: str | None = None
file_id: str | None = None
file_url: str | None = None
filename: str | None = None
@model_validator(mode="after")
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
raise ValueError(
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
)
return self
OpenAIResponseInputMessageContent = Annotated[ OpenAIResponseInputMessageContent = Annotated[
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage, OpenAIResponseInputMessageContentText
| OpenAIResponseInputMessageContentImage
| OpenAIResponseInputMessageContentFile,
Field(discriminator="type"), Field(discriminator="type"),
] ]
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent") register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
@json_schema_type
class OpenAIResponsePrompt(BaseModel):
"""OpenAI compatible Prompt object that is used in OpenAI responses.
:param id: Unique identifier of the prompt template
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
like images or files.
:param version: Version number of the prompt to use (defaults to latest if not specified)
"""
id: str
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
version: str | None = None
@json_schema_type @json_schema_type
class OpenAIResponseAnnotationFileCitation(BaseModel): class OpenAIResponseAnnotationFileCitation(BaseModel):
"""File citation annotation for referencing specific files in response content. """File citation annotation for referencing specific files in response content.
@ -159,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
scenarios. scenarios.
""" """
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent] content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"] role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
type: Literal["message"] = "message" type: Literal["message"] = "message"
@ -211,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
""" """
id: str id: str
queries: list[str] queries: Sequence[str]
status: str status: str
type: Literal["file_search_call"] = "file_search_call" type: Literal["file_search_call"] = "file_search_call"
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
@json_schema_type @json_schema_type
@ -538,6 +582,7 @@ class OpenAIResponseObject(BaseModel):
:param output: List of generated output items (messages, tool calls, etc.) :param output: List of generated output items (messages, tool calls, etc.)
:param parallel_tool_calls: Whether tool calls can be executed in parallel :param parallel_tool_calls: Whether tool calls can be executed in parallel
:param previous_response_id: (Optional) ID of the previous response in a conversation :param previous_response_id: (Optional) ID of the previous response in a conversation
:param prompt: (Optional) Reference to a prompt template and its variables.
:param status: Current status of the response generation :param status: Current status of the response generation
:param temperature: (Optional) Sampling temperature used for generation :param temperature: (Optional) Sampling temperature used for generation
:param text: Text formatting configuration for the response :param text: Text formatting configuration for the response
@ -553,16 +598,17 @@ class OpenAIResponseObject(BaseModel):
id: str id: str
model: str model: str
object: Literal["response"] = "response" object: Literal["response"] = "response"
output: list[OpenAIResponseOutput] output: Sequence[OpenAIResponseOutput]
parallel_tool_calls: bool = False parallel_tool_calls: bool = False
previous_response_id: str | None = None previous_response_id: str | None = None
prompt: OpenAIResponsePrompt | None = None
status: str status: str
temperature: float | None = None temperature: float | None = None
# Default to text format to avoid breaking the loading of old responses # Default to text format to avoid breaking the loading of old responses
# before the field was added. New responses will have this set always. # before the field was added. New responses will have this set always.
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text")) text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
top_p: float | None = None top_p: float | None = None
tools: list[OpenAIResponseTool] | None = None tools: Sequence[OpenAIResponseTool] | None = None
truncation: str | None = None truncation: str | None = None
usage: OpenAIResponseUsage | None = None usage: OpenAIResponseUsage | None = None
instructions: str | None = None instructions: str | None = None
@ -1270,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseInput] data: Sequence[OpenAIResponseInput]
object: Literal["list"] = "list" object: Literal["list"] = "list"
@ -1281,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
:param input: List of input items that led to this response :param input: List of input items that led to this response
""" """
input: list[OpenAIResponseInput] input: Sequence[OpenAIResponseInput]
def to_response_object(self) -> OpenAIResponseObject: def to_response_object(self) -> OpenAIResponseObject:
"""Convert to OpenAIResponseObject by excluding input field.""" """Convert to OpenAIResponseObject by excluding input field."""
@ -1299,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
:param object: Object type identifier, always "list" :param object: Object type identifier, always "list"
""" """
data: list[OpenAIResponseObjectWithInput] data: Sequence[OpenAIResponseObjectWithInput]
has_more: bool has_more: bool
first_id: str first_id: str
last_id: str last_id: str

View file

@ -43,7 +43,6 @@ class Batches(Protocol):
Note: This API is currently under active development and may undergo changes. Note: This API is currently under active development and may undergo changes.
""" """
@webmethod(route="/openai/v1/batches", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/batches", method="POST", level=LLAMA_STACK_API_V1)
async def create_batch( async def create_batch(
self, self,
@ -64,7 +63,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/batches/{batch_id}", method="GET", level=LLAMA_STACK_API_V1)
async def retrieve_batch(self, batch_id: str) -> BatchObject: async def retrieve_batch(self, batch_id: str) -> BatchObject:
"""Retrieve information about a specific batch. """Retrieve information about a specific batch.
@ -74,7 +72,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/batches/{batch_id}/cancel", method="POST", level=LLAMA_STACK_API_V1)
async def cancel_batch(self, batch_id: str) -> BatchObject: async def cancel_batch(self, batch_id: str) -> BatchObject:
"""Cancel a batch that is in progress. """Cancel a batch that is in progress.
@ -84,7 +81,6 @@ class Batches(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/batches", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/batches", method="GET", level=LLAMA_STACK_API_V1)
async def list_batches( async def list_batches(
self, self,

View file

@ -8,7 +8,7 @@ from typing import Any, Literal, Protocol, runtime_checkable
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
@ -54,7 +54,6 @@ class ListBenchmarksResponse(BaseModel):
@runtime_checkable @runtime_checkable
class Benchmarks(Protocol): class Benchmarks(Protocol):
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def list_benchmarks(self) -> ListBenchmarksResponse: async def list_benchmarks(self) -> ListBenchmarksResponse:
"""List all benchmarks. """List all benchmarks.
@ -63,7 +62,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_benchmark( async def get_benchmark(
self, self,
@ -76,7 +74,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def register_benchmark( async def register_benchmark(
self, self,
@ -98,7 +95,6 @@ class Benchmarks(Protocol):
""" """
... ...
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def unregister_benchmark(self, benchmark_id: str) -> None: async def unregister_benchmark(self, benchmark_id: str) -> None:
"""Unregister a benchmark. """Unregister a benchmark.

View file

@ -8,7 +8,7 @@ from typing import Any, Protocol, runtime_checkable
from llama_stack.apis.common.responses import PaginatedResponse from llama_stack.apis.common.responses import PaginatedResponse
from llama_stack.apis.datasets import Dataset from llama_stack.apis.datasets import Dataset
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import webmethod from llama_stack.schema_utils import webmethod
@ -21,7 +21,6 @@ class DatasetIO(Protocol):
# keeping for aligning with inference/safety, but this is not used # keeping for aligning with inference/safety, but this is not used
dataset_store: DatasetStore dataset_store: DatasetStore
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasetio/iterrows/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def iterrows( async def iterrows(
self, self,
@ -46,9 +45,6 @@ class DatasetIO(Protocol):
""" """
... ...
@webmethod(
route="/datasetio/append-rows/{dataset_id:path}", method="POST", deprecated=True, level=LLAMA_STACK_API_V1
)
@webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasetio/append-rows/{dataset_id:path}", method="POST", level=LLAMA_STACK_API_V1BETA)
async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None: async def append_rows(self, dataset_id: str, rows: list[dict[str, Any]]) -> None:
"""Append rows to a dataset. """Append rows to a dataset.

View file

@ -10,7 +10,7 @@ from typing import Annotated, Any, Literal, Protocol
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from llama_stack.apis.resource import Resource, ResourceType from llama_stack.apis.resource import Resource, ResourceType
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1BETA from llama_stack.apis.version import LLAMA_STACK_API_V1BETA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -146,7 +146,6 @@ class ListDatasetsResponse(BaseModel):
class Datasets(Protocol): class Datasets(Protocol):
@webmethod(route="/datasets", method="POST", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets", method="POST", level=LLAMA_STACK_API_V1BETA)
async def register_dataset( async def register_dataset(
self, self,
@ -216,7 +215,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets/{dataset_id:path}", method="GET", level=LLAMA_STACK_API_V1BETA)
async def get_dataset( async def get_dataset(
self, self,
@ -229,7 +227,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets", method="GET", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets", method="GET", level=LLAMA_STACK_API_V1BETA)
async def list_datasets(self) -> ListDatasetsResponse: async def list_datasets(self) -> ListDatasetsResponse:
"""List all datasets. """List all datasets.
@ -238,7 +235,6 @@ class Datasets(Protocol):
""" """
... ...
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", deprecated=True, level=LLAMA_STACK_API_V1)
@webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA) @webmethod(route="/datasets/{dataset_id:path}", method="DELETE", level=LLAMA_STACK_API_V1BETA)
async def unregister_dataset( async def unregister_dataset(
self, self,

View file

@ -13,7 +13,7 @@ from llama_stack.apis.common.job_types import Job
from llama_stack.apis.inference import SamplingParams, SystemMessage from llama_stack.apis.inference import SamplingParams, SystemMessage
from llama_stack.apis.scoring import ScoringResult from llama_stack.apis.scoring import ScoringResult
from llama_stack.apis.scoring_functions import ScoringFnParams from llama_stack.apis.scoring_functions import ScoringFnParams
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -86,7 +86,6 @@ class Eval(Protocol):
Llama Stack Evaluation API for running evaluations on model and agent candidates.""" Llama Stack Evaluation API for running evaluations on model and agent candidates."""
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def run_eval( async def run_eval(
self, self,
@ -101,9 +100,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/evaluations", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def evaluate_rows( async def evaluate_rows(
self, self,
@ -122,9 +118,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def job_status(self, benchmark_id: str, job_id: str) -> Job: async def job_status(self, benchmark_id: str, job_id: str) -> Job:
"""Get the status of a job. """Get the status of a job.
@ -135,12 +128,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}", method="DELETE", level=LLAMA_STACK_API_V1ALPHA)
async def job_cancel(self, benchmark_id: str, job_id: str) -> None: async def job_cancel(self, benchmark_id: str, job_id: str) -> None:
"""Cancel a job. """Cancel a job.
@ -150,12 +137,6 @@ class Eval(Protocol):
""" """
... ...
@webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA route="/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result", method="GET", level=LLAMA_STACK_API_V1ALPHA
) )

View file

@ -110,7 +110,6 @@ class Files(Protocol):
""" """
# OpenAI Files API Endpoints # OpenAI Files API Endpoints
@webmethod(route="/openai/v1/files", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/files", method="POST", level=LLAMA_STACK_API_V1)
async def openai_upload_file( async def openai_upload_file(
self, self,
@ -134,7 +133,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_files( async def openai_list_files(
self, self,
@ -155,7 +153,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file( async def openai_retrieve_file(
self, self,
@ -170,7 +167,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}", method="DELETE", level=LLAMA_STACK_API_V1)
async def openai_delete_file( async def openai_delete_file(
self, self,
@ -183,7 +179,6 @@ class Files(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/files/{file_id}/content", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_file_content( async def openai_retrieve_file_content(
self, self,

View file

@ -1189,7 +1189,6 @@ class InferenceProvider(Protocol):
raise NotImplementedError("Reranking is not implemented") raise NotImplementedError("Reranking is not implemented")
return # this is so mypy's safe-super rule will consider the method concrete return # this is so mypy's safe-super rule will consider the method concrete
@webmethod(route="/openai/v1/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_completion( async def openai_completion(
self, self,
@ -1202,7 +1201,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/chat/completions", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions", method="POST", level=LLAMA_STACK_API_V1)
async def openai_chat_completion( async def openai_chat_completion(
self, self,
@ -1215,7 +1213,6 @@ class InferenceProvider(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/embeddings", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/embeddings", method="POST", level=LLAMA_STACK_API_V1)
async def openai_embeddings( async def openai_embeddings(
self, self,
@ -1240,7 +1237,6 @@ class Inference(InferenceProvider):
- Rerank models: these models reorder the documents based on their relevance to a query. - Rerank models: these models reorder the documents based on their relevance to a query.
""" """
@webmethod(route="/openai/v1/chat/completions", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions", method="GET", level=LLAMA_STACK_API_V1)
async def list_chat_completions( async def list_chat_completions(
self, self,
@ -1259,9 +1255,6 @@ class Inference(InferenceProvider):
""" """
raise NotImplementedError("List chat completions is not implemented") raise NotImplementedError("List chat completions is not implemented")
@webmethod(
route="/openai/v1/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/chat/completions/{completion_id}", method="GET", level=LLAMA_STACK_API_V1)
async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages: async def get_chat_completion(self, completion_id: str) -> OpenAICompletionWithInputMessages:
"""Get chat completion. """Get chat completion.

View file

@ -4,14 +4,21 @@
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
from typing import Protocol, runtime_checkable from typing import Literal, Protocol, runtime_checkable
from pydantic import BaseModel from pydantic import BaseModel
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import (
LLAMA_STACK_API_V1,
)
from llama_stack.providers.datatypes import HealthStatus from llama_stack.providers.datatypes import HealthStatus
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
# Valid values for the route filter parameter.
# Actual API levels: v1, v1alpha, v1beta (filters by level, excludes deprecated)
# Special filter value: "deprecated" (shows deprecated routes regardless of level)
ApiFilter = Literal["v1", "v1alpha", "v1beta", "deprecated"]
@json_schema_type @json_schema_type
class RouteInfo(BaseModel): class RouteInfo(BaseModel):
@ -64,11 +71,12 @@ class Inspect(Protocol):
""" """
@webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/inspect/routes", method="GET", level=LLAMA_STACK_API_V1)
async def list_routes(self) -> ListRoutesResponse: async def list_routes(self, api_filter: ApiFilter | None = None) -> ListRoutesResponse:
"""List routes. """List routes.
List all available API routes with their methods and implementing providers. List all available API routes with their methods and implementing providers.
:param api_filter: Optional filter to control which routes are returned. Can be an API level ('v1', 'v1alpha', 'v1beta') to show non-deprecated routes at that level, or 'deprecated' to show deprecated routes across all levels. If not specified, returns only non-deprecated v1 routes.
:returns: Response containing information about all available routes. :returns: Response containing information about all available routes.
""" """
... ...

View file

@ -90,12 +90,14 @@ class OpenAIModel(BaseModel):
:object: The object type, which will be "model" :object: The object type, which will be "model"
:created: The Unix timestamp in seconds when the model was created :created: The Unix timestamp in seconds when the model was created
:owned_by: The owner of the model :owned_by: The owner of the model
:custom_metadata: Llama Stack-specific metadata including model_type, provider info, and additional metadata
""" """
id: str id: str
object: Literal["model"] = "model" object: Literal["model"] = "model"
created: int created: int
owned_by: str owned_by: str
custom_metadata: dict[str, Any] | None = None
class OpenAIListModelsResponse(BaseModel): class OpenAIListModelsResponse(BaseModel):
@ -105,7 +107,6 @@ class OpenAIListModelsResponse(BaseModel):
@runtime_checkable @runtime_checkable
@trace_protocol @trace_protocol
class Models(Protocol): class Models(Protocol):
@webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def list_models(self) -> ListModelsResponse: async def list_models(self) -> ListModelsResponse:
"""List all models. """List all models.
@ -113,7 +114,7 @@ class Models(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/models", method="GET", level=LLAMA_STACK_API_V1, deprecated=True) @webmethod(route="/models", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_models(self) -> OpenAIListModelsResponse: async def openai_list_models(self) -> OpenAIListModelsResponse:
"""List models using the OpenAI API. """List models using the OpenAI API.

View file

@ -13,7 +13,7 @@ from pydantic import BaseModel, Field
from llama_stack.apis.common.content_types import URL from llama_stack.apis.common.content_types import URL
from llama_stack.apis.common.job_types import JobStatus from llama_stack.apis.common.job_types import JobStatus
from llama_stack.apis.common.training_types import Checkpoint from llama_stack.apis.common.training_types import Checkpoint
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA from llama_stack.apis.version import LLAMA_STACK_API_V1ALPHA
from llama_stack.schema_utils import json_schema_type, register_schema, webmethod from llama_stack.schema_utils import json_schema_type, register_schema, webmethod
@ -284,7 +284,6 @@ class PostTrainingJobArtifactsResponse(BaseModel):
class PostTraining(Protocol): class PostTraining(Protocol):
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/supervised-fine-tune", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def supervised_fine_tune( async def supervised_fine_tune(
self, self,
@ -312,7 +311,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/preference-optimize", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def preference_optimize( async def preference_optimize(
self, self,
@ -335,7 +333,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/jobs", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_jobs(self) -> ListPostTrainingJobsResponse: async def get_training_jobs(self) -> ListPostTrainingJobsResponse:
"""Get all training jobs. """Get all training jobs.
@ -344,7 +341,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/status", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse: async def get_training_job_status(self, job_uuid: str) -> PostTrainingJobStatusResponse:
"""Get the status of a training job. """Get the status of a training job.
@ -354,7 +350,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/cancel", method="POST", level=LLAMA_STACK_API_V1ALPHA)
async def cancel_training_job(self, job_uuid: str) -> None: async def cancel_training_job(self, job_uuid: str) -> None:
"""Cancel a training job. """Cancel a training job.
@ -363,7 +358,6 @@ class PostTraining(Protocol):
""" """
... ...
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA) @webmethod(route="/post-training/job/artifacts", method="GET", level=LLAMA_STACK_API_V1ALPHA)
async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse: async def get_training_job_artifacts(self, job_uuid: str) -> PostTrainingJobArtifactsResponse:
"""Get the artifacts of a training job. """Get the artifacts of a training job.

View file

@ -121,7 +121,6 @@ class Safety(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/moderations", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/moderations", method="POST", level=LLAMA_STACK_API_V1)
async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject: async def run_moderation(self, input: str | list[str], model: str | None = None) -> ModerationObject:
"""Create moderation. """Create moderation.

View file

@ -1,7 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from .synthetic_data_generation import *

View file

@ -1,77 +0,0 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.
from enum import Enum
from typing import Any, Protocol
from pydantic import BaseModel
from llama_stack.apis.inference import Message
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.schema_utils import json_schema_type, webmethod
class FilteringFunction(Enum):
"""The type of filtering function.
:cvar none: No filtering applied, accept all generated synthetic data
:cvar random: Random sampling of generated data points
:cvar top_k: Keep only the top-k highest scoring synthetic data samples
:cvar top_p: Nucleus-style filtering, keep samples exceeding cumulative score threshold
:cvar top_k_top_p: Combined top-k and top-p filtering strategy
:cvar sigmoid: Apply sigmoid function for probability-based filtering
"""
none = "none"
random = "random"
top_k = "top_k"
top_p = "top_p"
top_k_top_p = "top_k_top_p"
sigmoid = "sigmoid"
@json_schema_type
class SyntheticDataGenerationRequest(BaseModel):
"""Request to generate synthetic data. A small batch of prompts and a filtering function
:param dialogs: List of conversation messages to use as input for synthetic data generation
:param filtering_function: Type of filtering to apply to generated synthetic data samples
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
"""
dialogs: list[Message]
filtering_function: FilteringFunction = FilteringFunction.none
model: str | None = None
@json_schema_type
class SyntheticDataGenerationResponse(BaseModel):
"""Response from the synthetic data generation. Batch of (prompt, response, score) tuples that pass the threshold.
:param synthetic_data: List of generated synthetic data samples that passed the filtering criteria
:param statistics: (Optional) Statistical information about the generation process and filtering results
"""
synthetic_data: list[dict[str, Any]]
statistics: dict[str, Any] | None = None
class SyntheticDataGeneration(Protocol):
@webmethod(route="/synthetic-data-generation/generate", level=LLAMA_STACK_API_V1)
def synthetic_data_generate(
self,
dialogs: list[Message],
filtering_function: FilteringFunction = FilteringFunction.none,
model: str | None = None,
) -> SyntheticDataGenerationResponse:
"""Generate synthetic data based on input dialogs and apply filtering.
:param dialogs: List of conversation messages to use as input for synthetic data generation
:param filtering_function: Type of filtering to apply to generated synthetic data samples
:param model: (Optional) The identifier of the model to use. The model must be registered with Llama Stack and available via the /models endpoint
:returns: Response containing filtered synthetic data samples and optional statistics
"""
...

View file

@ -8,7 +8,6 @@
# #
# This source code is licensed under the terms described in the LICENSE file in # This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree. # the root directory of this source tree.
import uuid
from typing import Annotated, Any, Literal, Protocol, runtime_checkable from typing import Annotated, Any, Literal, Protocol, runtime_checkable
from fastapi import Body from fastapi import Body
@ -18,7 +17,6 @@ from llama_stack.apis.inference import InterleavedContent
from llama_stack.apis.vector_stores import VectorStore from llama_stack.apis.vector_stores import VectorStore
from llama_stack.apis.version import LLAMA_STACK_API_V1 from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.core.telemetry.trace_protocol import trace_protocol from llama_stack.core.telemetry.trace_protocol import trace_protocol
from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.schema_utils import json_schema_type, webmethod from llama_stack.schema_utils import json_schema_type, webmethod
from llama_stack.strong_typing.schema import register_schema from llama_stack.strong_typing.schema import register_schema
@ -61,38 +59,19 @@ class Chunk(BaseModel):
""" """
A chunk of content that can be inserted into a vector database. A chunk of content that can be inserted into a vector database.
:param content: The content of the chunk, which can be interleaved text, images, or other types. :param content: The content of the chunk, which can be interleaved text, images, or other types.
:param embedding: Optional embedding for the chunk. If not provided, it will be computed later. :param chunk_id: Unique identifier for the chunk. Must be provided explicitly.
:param metadata: Metadata associated with the chunk that will be used in the model context during inference. :param metadata: Metadata associated with the chunk that will be used in the model context during inference.
:param stored_chunk_id: The chunk ID that is stored in the vector database. Used for backend functionality. :param embedding: Optional embedding for the chunk. If not provided, it will be computed later.
:param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference. :param chunk_metadata: Metadata for the chunk that will NOT be used in the context during inference.
The `chunk_metadata` is required backend functionality. The `chunk_metadata` is required backend functionality.
""" """
content: InterleavedContent content: InterleavedContent
chunk_id: str
metadata: dict[str, Any] = Field(default_factory=dict) metadata: dict[str, Any] = Field(default_factory=dict)
embedding: list[float] | None = None embedding: list[float] | None = None
# The alias parameter serializes the field as "chunk_id" in JSON but keeps the internal name as "stored_chunk_id"
stored_chunk_id: str | None = Field(default=None, alias="chunk_id")
chunk_metadata: ChunkMetadata | None = None chunk_metadata: ChunkMetadata | None = None
model_config = {"populate_by_name": True}
def model_post_init(self, __context):
# Extract chunk_id from metadata if present
if self.metadata and "chunk_id" in self.metadata:
self.stored_chunk_id = self.metadata.pop("chunk_id")
@property
def chunk_id(self) -> str:
"""Returns the chunk ID, which is either an input `chunk_id` or a generated one if not set."""
if self.stored_chunk_id:
return self.stored_chunk_id
if "document_id" in self.metadata:
return generate_chunk_id(self.metadata["document_id"], str(self.content))
return generate_chunk_id(str(uuid.uuid4()), str(self.content))
@property @property
def document_id(self) -> str | None: def document_id(self) -> str | None:
"""Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence.""" """Returns the document_id from either metadata or chunk_metadata, with metadata taking precedence."""
@ -566,7 +545,6 @@ class VectorIO(Protocol):
... ...
# OpenAI Vector Stores API endpoints # OpenAI Vector Stores API endpoints
@webmethod(route="/openai/v1/vector_stores", method="POST", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores", method="POST", level=LLAMA_STACK_API_V1)
async def openai_create_vector_store( async def openai_create_vector_store(
self, self,
@ -579,7 +557,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(route="/openai/v1/vector_stores", method="GET", level=LLAMA_STACK_API_V1, deprecated=True)
@webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores", method="GET", level=LLAMA_STACK_API_V1)
async def openai_list_vector_stores( async def openai_list_vector_stores(
self, self,
@ -598,9 +575,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1) @webmethod(route="/vector_stores/{vector_store_id}", method="GET", level=LLAMA_STACK_API_V1)
async def openai_retrieve_vector_store( async def openai_retrieve_vector_store(
self, self,
@ -613,9 +587,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="POST", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}", route="/vector_stores/{vector_store_id}",
method="POST", method="POST",
@ -638,9 +609,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}", method="DELETE", level=LLAMA_STACK_API_V1, deprecated=True
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}", route="/vector_stores/{vector_store_id}",
method="DELETE", method="DELETE",
@ -657,12 +625,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/search",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/search", route="/vector_stores/{vector_store_id}/search",
method="POST", method="POST",
@ -695,12 +657,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files", route="/vector_stores/{vector_store_id}/files",
method="POST", method="POST",
@ -723,12 +679,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files", route="/vector_stores/{vector_store_id}/files",
method="GET", method="GET",
@ -755,12 +705,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="GET", method="GET",
@ -779,12 +723,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}/content", route="/vector_stores/{vector_store_id}/files/{file_id}/content",
method="GET", method="GET",
@ -803,12 +741,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="POST", method="POST",
@ -829,12 +761,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/files/{file_id}", route="/vector_stores/{vector_store_id}/files/{file_id}",
method="DELETE", method="DELETE",
@ -858,12 +784,6 @@ class VectorIO(Protocol):
method="POST", method="POST",
level=LLAMA_STACK_API_V1, level=LLAMA_STACK_API_V1,
) )
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_create_vector_store_file_batch( async def openai_create_vector_store_file_batch(
self, self,
vector_store_id: str, vector_store_id: str,
@ -882,12 +802,6 @@ class VectorIO(Protocol):
method="GET", method="GET",
level=LLAMA_STACK_API_V1, level=LLAMA_STACK_API_V1,
) )
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
async def openai_retrieve_vector_store_file_batch( async def openai_retrieve_vector_store_file_batch(
self, self,
batch_id: str, batch_id: str,
@ -901,12 +815,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files", route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
method="GET", method="GET",
@ -935,12 +843,6 @@ class VectorIO(Protocol):
""" """
... ...
@webmethod(
route="/openai/v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST",
level=LLAMA_STACK_API_V1,
deprecated=True,
)
@webmethod( @webmethod(
route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel", route="/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
method="POST", method="POST",

View file

@ -8,16 +8,30 @@ import argparse
import os import os
import ssl import ssl
import subprocess import subprocess
import sys
from pathlib import Path from pathlib import Path
import uvicorn import uvicorn
import yaml import yaml
from termcolor import cprint
from llama_stack.cli.stack.utils import ImageType from llama_stack.cli.stack.utils import ImageType
from llama_stack.cli.subcommand import Subcommand from llama_stack.cli.subcommand import Subcommand
from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.datatypes import Api, Provider, StackRunConfig
from llama_stack.core.distribution import get_provider_registry
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.storage.datatypes import (
InferenceStoreReference,
KVStoreReference,
ServerStoresConfig,
SqliteKVStoreConfig,
SqliteSqlStoreConfig,
SqlStoreReference,
StorageConfig,
)
from llama_stack.core.utils.config_dirs import DISTRIBS_BASE_DIR
from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro from llama_stack.core.utils.config_resolution import Mode, resolve_config_or_distro
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import LoggingConfig, get_logger from llama_stack.log import LoggingConfig, get_logger
REPO_ROOT = Path(__file__).parent.parent.parent.parent REPO_ROOT = Path(__file__).parent.parent.parent.parent
@ -68,6 +82,12 @@ class StackRun(Subcommand):
action="store_true", action="store_true",
help="Start the UI server", help="Start the UI server",
) )
self.parser.add_argument(
"--providers",
type=str,
default=None,
help="Run a stack with only a list of providers. This list is formatted like: api1=provider1,api1=provider2,api2=provider3. Where there can be multiple providers per API.",
)
def _run_stack_run_cmd(self, args: argparse.Namespace) -> None: def _run_stack_run_cmd(self, args: argparse.Namespace) -> None:
import yaml import yaml
@ -93,6 +113,55 @@ class StackRun(Subcommand):
config_file = resolve_config_or_distro(args.config, Mode.RUN) config_file = resolve_config_or_distro(args.config, Mode.RUN)
except ValueError as e: except ValueError as e:
self.parser.error(str(e)) self.parser.error(str(e))
elif args.providers:
provider_list: dict[str, list[Provider]] = dict()
for api_provider in args.providers.split(","):
if "=" not in api_provider:
cprint(
"Could not parse `--providers`. Please ensure the list is in the format api1=provider1,api2=provider2",
color="red",
file=sys.stderr,
)
sys.exit(1)
api, provider_type = api_provider.split("=")
providers_for_api = get_provider_registry().get(Api(api), None)
if providers_for_api is None:
cprint(
f"{api} is not a valid API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
if provider_type in providers_for_api:
config_type = instantiate_class_type(providers_for_api[provider_type].config_class)
if config_type is not None and hasattr(config_type, "sample_run_config"):
config = config_type.sample_run_config(__distro_dir__="~/.llama/distributions/providers-run")
else:
config = {}
provider = Provider(
provider_type=provider_type,
config=config,
provider_id=provider_type.split("::")[1],
)
provider_list.setdefault(api, []).append(provider)
else:
cprint(
f"{provider} is not a valid provider for the {api} API.",
color="red",
file=sys.stderr,
)
sys.exit(1)
run_config = self._generate_run_config_from_providers(providers=provider_list)
config_dict = run_config.model_dump(mode="json")
# Write config to disk in providers-run directory
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
config_file = distro_dir / "run.yaml"
logger.info(f"Writing generated config to: {config_file}")
with open(config_file, "w") as f:
yaml.dump(config_dict, f, default_flow_style=False, sort_keys=False)
else: else:
config_file = None config_file = None
@ -106,7 +175,8 @@ class StackRun(Subcommand):
try: try:
config = parse_and_maybe_upgrade_config(config_dict) config = parse_and_maybe_upgrade_config(config_dict)
if not os.path.exists(str(config.external_providers_dir)): # Create external_providers_dir if it's specified and doesn't exist
if config.external_providers_dir and not os.path.exists(str(config.external_providers_dir)):
os.makedirs(str(config.external_providers_dir), exist_ok=True) os.makedirs(str(config.external_providers_dir), exist_ok=True)
except AttributeError as e: except AttributeError as e:
self.parser.error(f"failed to parse config file '{config_file}':\n {e}") self.parser.error(f"failed to parse config file '{config_file}':\n {e}")
@ -127,7 +197,7 @@ class StackRun(Subcommand):
config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents))) config = StackRunConfig(**cast_image_name_to_string(replace_env_vars(config_contents)))
port = args.port or config.server.port port = args.port or config.server.port
host = config.server.host or ["::", "0.0.0.0"] host = config.server.host or "0.0.0.0"
# Set the config file in environment so create_app can find it # Set the config file in environment so create_app can find it
os.environ["LLAMA_STACK_CONFIG"] = str(config_file) os.environ["LLAMA_STACK_CONFIG"] = str(config_file)
@ -139,6 +209,7 @@ class StackRun(Subcommand):
"lifespan": "on", "lifespan": "on",
"log_level": logger.getEffectiveLevel(), "log_level": logger.getEffectiveLevel(),
"log_config": logger_config, "log_config": logger_config,
"workers": config.server.workers,
} }
keyfile = config.server.tls_keyfile keyfile = config.server.tls_keyfile
@ -168,7 +239,7 @@ class StackRun(Subcommand):
# Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own # Another approach would be to ignore SIGINT entirely - let uvicorn handle it through its own
# signal handling but this is quite intrusive and not worth the effort. # signal handling but this is quite intrusive and not worth the effort.
try: try:
uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) uvicorn.run("llama_stack.core.server.server:create_app", **uvicorn_config) # type: ignore[arg-type]
except (KeyboardInterrupt, SystemExit): except (KeyboardInterrupt, SystemExit):
logger.info("Received interrupt signal, shutting down gracefully...") logger.info("Received interrupt signal, shutting down gracefully...")
@ -212,3 +283,44 @@ class StackRun(Subcommand):
) )
except Exception as e: except Exception as e:
logger.error(f"Failed to start UI development server in {ui_dir}: {e}") logger.error(f"Failed to start UI development server in {ui_dir}: {e}")
def _generate_run_config_from_providers(self, providers: dict[str, list[Provider]]):
apis = list(providers.keys())
distro_dir = DISTRIBS_BASE_DIR / "providers-run"
# need somewhere to put the storage.
os.makedirs(distro_dir, exist_ok=True)
storage = StorageConfig(
backends={
"kv_default": SqliteKVStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/kvstore.db",
),
"sql_default": SqliteSqlStoreConfig(
db_path=f"${{env.SQLITE_STORE_DIR:={distro_dir}}}/sql_store.db",
),
},
stores=ServerStoresConfig(
metadata=KVStoreReference(
backend="kv_default",
namespace="registry",
),
inference=InferenceStoreReference(
backend="sql_default",
table_name="inference_store",
),
conversations=SqlStoreReference(
backend="sql_default",
table_name="openai_conversations",
),
prompts=KVStoreReference(
backend="kv_default",
namespace="prompts",
),
),
)
return StackRunConfig(
image_name="providers-run",
apis=apis,
providers=providers,
storage=storage,
)

View file

@ -17,7 +17,6 @@ from llama_stack.core.distribution import (
get_provider_registry, get_provider_registry,
) )
from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars from llama_stack.core.stack import cast_image_name_to_string, replace_env_vars
from llama_stack.core.utils.config_dirs import EXTERNAL_PROVIDERS_DIR
from llama_stack.core.utils.dynamic import instantiate_class_type from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.core.utils.prompt_for_config import prompt_for_config from llama_stack.core.utils.prompt_for_config import prompt_for_config
from llama_stack.log import get_logger from llama_stack.log import get_logger
@ -194,19 +193,11 @@ def upgrade_from_routing_table(
def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig: def parse_and_maybe_upgrade_config(config_dict: dict[str, Any]) -> StackRunConfig:
version = config_dict.get("version", None)
if version == LLAMA_STACK_RUN_CONFIG_VERSION:
processed_config_dict = replace_env_vars(config_dict)
return StackRunConfig(**cast_image_name_to_string(processed_config_dict))
if "routing_table" in config_dict: if "routing_table" in config_dict:
logger.info("Upgrading config...") logger.info("Upgrading config...")
config_dict = upgrade_from_routing_table(config_dict) config_dict = upgrade_from_routing_table(config_dict)
config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION config_dict["version"] = LLAMA_STACK_RUN_CONFIG_VERSION
if not config_dict.get("external_providers_dir", None):
config_dict["external_providers_dir"] = EXTERNAL_PROVIDERS_DIR
processed_config_dict = replace_env_vars(config_dict) processed_config_dict = replace_env_vars(config_dict)
return StackRunConfig(**cast_image_name_to_string(processed_config_dict)) return StackRunConfig(**cast_image_name_to_string(processed_config_dict))

View file

@ -473,6 +473,10 @@ class ServerConfig(BaseModel):
"- true: Enable localhost CORS for development\n" "- true: Enable localhost CORS for development\n"
"- {allow_origins: [...], allow_methods: [...], ...}: Full configuration", "- {allow_origins: [...], allow_methods: [...], ...}: Full configuration",
) )
workers: int = Field(
default=1,
description="Number of workers to use for the server",
)
class StackRunConfig(BaseModel): class StackRunConfig(BaseModel):

View file

@ -15,6 +15,7 @@ from llama_stack.apis.inspect import (
RouteInfo, RouteInfo,
VersionInfo, VersionInfo,
) )
from llama_stack.apis.version import LLAMA_STACK_API_V1
from llama_stack.core.datatypes import StackRunConfig from llama_stack.core.datatypes import StackRunConfig
from llama_stack.core.external import load_external_apis from llama_stack.core.external import load_external_apis
from llama_stack.core.server.routes import get_all_api_routes from llama_stack.core.server.routes import get_all_api_routes
@ -39,9 +40,21 @@ class DistributionInspectImpl(Inspect):
async def initialize(self) -> None: async def initialize(self) -> None:
pass pass
async def list_routes(self) -> ListRoutesResponse: async def list_routes(self, api_filter: str | None = None) -> ListRoutesResponse:
run_config: StackRunConfig = self.config.run_config run_config: StackRunConfig = self.config.run_config
# Helper function to determine if a route should be included based on api_filter
def should_include_route(webmethod) -> bool:
if api_filter is None:
# Default: only non-deprecated v1 APIs
return not webmethod.deprecated and webmethod.level == LLAMA_STACK_API_V1
elif api_filter == "deprecated":
# Special filter: show deprecated routes regardless of their actual level
return bool(webmethod.deprecated)
else:
# Filter by API level (non-deprecated routes only)
return not webmethod.deprecated and webmethod.level == api_filter
ret = [] ret = []
external_apis = load_external_apis(run_config) external_apis = load_external_apis(run_config)
all_endpoints = get_all_api_routes(external_apis) all_endpoints = get_all_api_routes(external_apis)
@ -55,8 +68,8 @@ class DistributionInspectImpl(Inspect):
method=next(iter([m for m in e.methods if m != "HEAD"])), method=next(iter([m for m in e.methods if m != "HEAD"])),
provider_types=[], # These APIs don't have "real" providers - they're internal to the stack provider_types=[], # These APIs don't have "real" providers - they're internal to the stack
) )
for e, _ in endpoints for e, webmethod in endpoints
if e.methods is not None if e.methods is not None and should_include_route(webmethod)
] ]
) )
else: else:
@ -69,8 +82,8 @@ class DistributionInspectImpl(Inspect):
method=next(iter([m for m in e.methods if m != "HEAD"])), method=next(iter([m for m in e.methods if m != "HEAD"])),
provider_types=[p.provider_type for p in providers], provider_types=[p.provider_type for p in providers],
) )
for e, _ in endpoints for e, webmethod in endpoints
if e.methods is not None if e.methods is not None and should_include_route(webmethod)
] ]
) )

View file

@ -6,7 +6,7 @@
import asyncio import asyncio
import time import time
from collections.abc import AsyncGenerator, AsyncIterator from collections.abc import AsyncIterator
from datetime import UTC, datetime from datetime import UTC, datetime
from typing import Annotated, Any from typing import Annotated, Any
@ -15,20 +15,10 @@ from openai.types.chat import ChatCompletionToolChoiceOptionParam as OpenAIChatC
from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam from openai.types.chat import ChatCompletionToolParam as OpenAIChatCompletionToolParam
from pydantic import TypeAdapter from pydantic import TypeAdapter
from llama_stack.apis.common.content_types import (
InterleavedContent,
)
from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError from llama_stack.apis.common.errors import ModelNotFoundError, ModelTypeError
from llama_stack.apis.inference import ( from llama_stack.apis.inference import (
ChatCompletionResponse,
ChatCompletionResponseEventType,
ChatCompletionResponseStreamChunk,
CompletionMessage,
CompletionResponse,
CompletionResponseStreamChunk,
Inference, Inference,
ListOpenAIChatCompletionResponse, ListOpenAIChatCompletionResponse,
Message,
OpenAIAssistantMessageParam, OpenAIAssistantMessageParam,
OpenAIChatCompletion, OpenAIChatCompletion,
OpenAIChatCompletionChunk, OpenAIChatCompletionChunk,
@ -45,15 +35,13 @@ from llama_stack.apis.inference import (
OpenAIMessageParam, OpenAIMessageParam,
Order, Order,
RerankResponse, RerankResponse,
StopReason,
ToolPromptFormat,
) )
from llama_stack.apis.inference.inference import ( from llama_stack.apis.inference.inference import (
OpenAIChatCompletionContentPartImageParam, OpenAIChatCompletionContentPartImageParam,
OpenAIChatCompletionContentPartTextParam, OpenAIChatCompletionContentPartTextParam,
) )
from llama_stack.apis.models import Model, ModelType from llama_stack.apis.models import ModelType
from llama_stack.core.telemetry.telemetry import MetricEvent, MetricInResponse from llama_stack.core.telemetry.telemetry import MetricEvent
from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span from llama_stack.core.telemetry.tracing import enqueue_event, get_current_span
from llama_stack.log import get_logger from llama_stack.log import get_logger
from llama_stack.models.llama.llama3.chat_format import ChatFormat from llama_stack.models.llama.llama3.chat_format import ChatFormat
@ -110,7 +98,8 @@ class InferenceRouter(Inference):
prompt_tokens: int, prompt_tokens: int,
completion_tokens: int, completion_tokens: int,
total_tokens: int, total_tokens: int,
model: Model, fully_qualified_model_id: str,
provider_id: str,
) -> list[MetricEvent]: ) -> list[MetricEvent]:
"""Constructs a list of MetricEvent objects containing token usage metrics. """Constructs a list of MetricEvent objects containing token usage metrics.
@ -118,7 +107,8 @@ class InferenceRouter(Inference):
prompt_tokens: Number of tokens in the prompt prompt_tokens: Number of tokens in the prompt
completion_tokens: Number of tokens in the completion completion_tokens: Number of tokens in the completion
total_tokens: Total number of tokens used total_tokens: Total number of tokens used
model: Model object containing model_id and provider_id fully_qualified_model_id:
provider_id: The provider identifier
Returns: Returns:
List of MetricEvent objects with token usage metrics List of MetricEvent objects with token usage metrics
@ -144,48 +134,32 @@ class InferenceRouter(Inference):
timestamp=datetime.now(UTC), timestamp=datetime.now(UTC),
unit="tokens", unit="tokens",
attributes={ attributes={
"model_id": model.model_id, "model_id": fully_qualified_model_id,
"provider_id": model.provider_id, "provider_id": provider_id,
}, },
) )
) )
return metric_events return metric_events
async def _compute_and_log_token_usage( async def _get_model_provider(self, model_id: str, expected_model_type: str) -> tuple[Inference, str]:
self, model = await self.routing_table.get_object_by_identifier("model", model_id)
prompt_tokens: int, if model:
completion_tokens: int,
total_tokens: int,
model: Model,
) -> list[MetricInResponse]:
metrics = self._construct_metrics(prompt_tokens, completion_tokens, total_tokens, model)
if self.telemetry_enabled:
for metric in metrics:
enqueue_event(metric)
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
async def _count_tokens(
self,
messages: list[Message] | InterleavedContent,
tool_prompt_format: ToolPromptFormat | None = None,
) -> int | None:
if not hasattr(self, "formatter") or self.formatter is None:
return None
if isinstance(messages, list):
encoded = self.formatter.encode_dialog_prompt(messages, tool_prompt_format)
else:
encoded = self.formatter.encode_content(messages)
return len(encoded.tokens) if encoded and encoded.tokens else 0
async def _get_model(self, model_id: str, expected_model_type: str) -> Model:
"""takes a model id and gets model after ensuring that it is accessible and of the correct type"""
model = await self.routing_table.get_model(model_id)
if model is None:
raise ModelNotFoundError(model_id)
if model.model_type != expected_model_type: if model.model_type != expected_model_type:
raise ModelTypeError(model_id, model.model_type, expected_model_type) raise ModelTypeError(model_id, model.model_type, expected_model_type)
return model
provider = await self.routing_table.get_provider_impl(model.identifier)
return provider, model.provider_resource_id
splits = model_id.split("/", maxsplit=1)
if len(splits) != 2:
raise ModelNotFoundError(model_id)
provider_id, provider_resource_id = splits
if provider_id not in self.routing_table.impls_by_provider_id:
logger.warning(f"Provider {provider_id} not found for model {model_id}")
raise ModelNotFoundError(model_id)
return self.routing_table.impls_by_provider_id[provider_id], provider_resource_id
async def rerank( async def rerank(
self, self,
@ -195,14 +169,8 @@ class InferenceRouter(Inference):
max_num_results: int | None = None, max_num_results: int | None = None,
) -> RerankResponse: ) -> RerankResponse:
logger.debug(f"InferenceRouter.rerank: {model}") logger.debug(f"InferenceRouter.rerank: {model}")
model_obj = await self._get_model(model, ModelType.rerank) provider, provider_resource_id = await self._get_model_provider(model, ModelType.rerank)
provider = await self.routing_table.get_provider_impl(model_obj.identifier) return await provider.rerank(provider_resource_id, query, items, max_num_results)
return await provider.rerank(
model=model_obj.identifier,
query=query,
items=items,
max_num_results=max_num_results,
)
async def openai_completion( async def openai_completion(
self, self,
@ -211,24 +179,24 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}", f"InferenceRouter.openai_completion: model={params.model}, stream={params.stream}, prompt={params.prompt}",
) )
model_obj = await self._get_model(params.model, ModelType.llm) request_model_id = params.model
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
params.model = provider_resource_id
# Update params with the resolved model identifier
params.model = model_obj.identifier
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
if params.stream: if params.stream:
return await provider.openai_completion(params) return await provider.openai_completion(params)
# TODO: Metrics do NOT work with openai_completion stream=True due to the fact # TODO: Metrics do NOT work with openai_completion stream=True due to the fact
# that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently. # that we do not return an AsyncIterator, our tests expect a stream of chunks we cannot intercept currently.
response = await provider.openai_completion(params) response = await provider.openai_completion(params)
response.model = request_model_id
if self.telemetry_enabled: if self.telemetry_enabled:
metrics = self._construct_metrics( metrics = self._construct_metrics(
prompt_tokens=response.usage.prompt_tokens, prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens, completion_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens, total_tokens=response.usage.total_tokens,
model=model_obj, fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
) )
for metric in metrics: for metric in metrics:
enqueue_event(metric) enqueue_event(metric)
@ -246,7 +214,9 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}", f"InferenceRouter.openai_chat_completion: model={params.model}, stream={params.stream}, messages={params.messages}",
) )
model_obj = await self._get_model(params.model, ModelType.llm) request_model_id = params.model
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.llm)
params.model = provider_resource_id
# Use the OpenAI client for a bit of extra input validation without # Use the OpenAI client for a bit of extra input validation without
# exposing the OpenAI client itself as part of our API surface # exposing the OpenAI client itself as part of our API surface
@ -264,10 +234,6 @@ class InferenceRouter(Inference):
params.tool_choice = None params.tool_choice = None
params.tools = None params.tools = None
# Update params with the resolved model identifier
params.model = model_obj.identifier
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
if params.stream: if params.stream:
response_stream = await provider.openai_chat_completion(params) response_stream = await provider.openai_chat_completion(params)
@ -275,11 +241,13 @@ class InferenceRouter(Inference):
# We need to add metrics to each chunk and store the final completion # We need to add metrics to each chunk and store the final completion
return self.stream_tokens_and_compute_metrics_openai_chat( return self.stream_tokens_and_compute_metrics_openai_chat(
response=response_stream, response=response_stream,
model=model_obj, fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
messages=params.messages, messages=params.messages,
) )
response = await self._nonstream_openai_chat_completion(provider, params) response = await self._nonstream_openai_chat_completion(provider, params)
response.model = request_model_id
# Store the response with the ID that will be returned to the client # Store the response with the ID that will be returned to the client
if self.store: if self.store:
@ -290,7 +258,8 @@ class InferenceRouter(Inference):
prompt_tokens=response.usage.prompt_tokens, prompt_tokens=response.usage.prompt_tokens,
completion_tokens=response.usage.completion_tokens, completion_tokens=response.usage.completion_tokens,
total_tokens=response.usage.total_tokens, total_tokens=response.usage.total_tokens,
model=model_obj, fully_qualified_model_id=request_model_id,
provider_id=provider.__provider_id__,
) )
for metric in metrics: for metric in metrics:
enqueue_event(metric) enqueue_event(metric)
@ -307,13 +276,13 @@ class InferenceRouter(Inference):
logger.debug( logger.debug(
f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}", f"InferenceRouter.openai_embeddings: model={params.model}, input_type={type(params.input)}, encoding_format={params.encoding_format}, dimensions={params.dimensions}",
) )
model_obj = await self._get_model(params.model, ModelType.embedding) request_model_id = params.model
provider, provider_resource_id = await self._get_model_provider(params.model, ModelType.embedding)
params.model = provider_resource_id
# Update model to use resolved identifier response = await provider.openai_embeddings(params)
params.model = model_obj.identifier response.model = request_model_id
return response
provider = await self.routing_table.get_provider_impl(model_obj.identifier)
return await provider.openai_embeddings(params)
async def list_chat_completions( async def list_chat_completions(
self, self,
@ -365,119 +334,11 @@ class InferenceRouter(Inference):
) )
return health_statuses return health_statuses
async def stream_tokens_and_compute_metrics(
self,
response,
prompt_tokens,
model,
tool_prompt_format: ToolPromptFormat | None = None,
) -> AsyncGenerator[ChatCompletionResponseStreamChunk, None] | AsyncGenerator[CompletionResponseStreamChunk, None]:
completion_text = ""
async for chunk in response:
complete = False
if hasattr(chunk, "event"): # only ChatCompletions have .event
if chunk.event.event_type == ChatCompletionResponseEventType.progress:
if chunk.event.delta.type == "text":
completion_text += chunk.event.delta.text
if chunk.event.event_type == ChatCompletionResponseEventType.complete:
complete = True
completion_tokens = await self._count_tokens(
[
CompletionMessage(
content=completion_text,
stop_reason=StopReason.end_of_turn,
)
],
tool_prompt_format=tool_prompt_format,
)
else:
if hasattr(chunk, "delta"):
completion_text += chunk.delta
if hasattr(chunk, "stop_reason") and chunk.stop_reason and self.telemetry_enabled:
complete = True
completion_tokens = await self._count_tokens(completion_text)
# if we are done receiving tokens
if complete:
total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
# Create a separate span for streaming completion metrics
if self.telemetry_enabled:
# Log metrics in the new span context
completion_metrics = self._construct_metrics(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
model=model,
)
for metric in completion_metrics:
if metric.metric in [
"completion_tokens",
"total_tokens",
]: # Only log completion and total tokens
enqueue_event(metric)
# Return metrics in response
async_metrics = [
MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics
]
chunk.metrics = async_metrics if chunk.metrics is None else chunk.metrics + async_metrics
else:
# Fallback if no telemetry
completion_metrics = self._construct_metrics(
prompt_tokens or 0,
completion_tokens or 0,
total_tokens,
model,
)
async_metrics = [
MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics
]
chunk.metrics = async_metrics if chunk.metrics is None else chunk.metrics + async_metrics
yield chunk
async def count_tokens_and_compute_metrics(
self,
response: ChatCompletionResponse | CompletionResponse,
prompt_tokens,
model,
tool_prompt_format: ToolPromptFormat | None = None,
):
if isinstance(response, ChatCompletionResponse):
content = [response.completion_message]
else:
content = response.content
completion_tokens = await self._count_tokens(messages=content, tool_prompt_format=tool_prompt_format)
total_tokens = (prompt_tokens or 0) + (completion_tokens or 0)
# Create a separate span for completion metrics
if self.telemetry_enabled:
# Log metrics in the new span context
completion_metrics = self._construct_metrics(
prompt_tokens=prompt_tokens,
completion_tokens=completion_tokens,
total_tokens=total_tokens,
model=model,
)
for metric in completion_metrics:
if metric.metric in ["completion_tokens", "total_tokens"]: # Only log completion and total tokens
enqueue_event(metric)
# Return metrics in response
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in completion_metrics]
# Fallback if no telemetry
metrics = self._construct_metrics(
prompt_tokens or 0,
completion_tokens or 0,
total_tokens,
model,
)
return [MetricInResponse(metric=metric.metric, value=metric.value) for metric in metrics]
async def stream_tokens_and_compute_metrics_openai_chat( async def stream_tokens_and_compute_metrics_openai_chat(
self, self,
response: AsyncIterator[OpenAIChatCompletionChunk], response: AsyncIterator[OpenAIChatCompletionChunk],
model: Model, fully_qualified_model_id: str,
provider_id: str,
messages: list[OpenAIMessageParam] | None = None, messages: list[OpenAIMessageParam] | None = None,
) -> AsyncIterator[OpenAIChatCompletionChunk]: ) -> AsyncIterator[OpenAIChatCompletionChunk]:
"""Stream OpenAI chat completion chunks, compute metrics, and store the final completion.""" """Stream OpenAI chat completion chunks, compute metrics, and store the final completion."""
@ -497,6 +358,8 @@ class InferenceRouter(Inference):
if created is None and chunk.created: if created is None and chunk.created:
created = chunk.created created = chunk.created
chunk.model = fully_qualified_model_id
# Accumulate choice data for final assembly # Accumulate choice data for final assembly
if chunk.choices: if chunk.choices:
for choice_delta in chunk.choices: for choice_delta in chunk.choices:
@ -553,7 +416,8 @@ class InferenceRouter(Inference):
prompt_tokens=chunk.usage.prompt_tokens, prompt_tokens=chunk.usage.prompt_tokens,
completion_tokens=chunk.usage.completion_tokens, completion_tokens=chunk.usage.completion_tokens,
total_tokens=chunk.usage.total_tokens, total_tokens=chunk.usage.total_tokens,
model=model, model_id=fully_qualified_model_id,
provider_id=provider_id,
) )
for metric in metrics: for metric in metrics:
enqueue_event(metric) enqueue_event(metric)
@ -601,7 +465,7 @@ class InferenceRouter(Inference):
id=id, id=id,
choices=assembled_choices, choices=assembled_choices,
created=created or int(time.time()), created=created or int(time.time()),
model=model.identifier, model=fully_qualified_model_id,
object="chat.completion", object="chat.completion",
) )
logger.debug(f"InferenceRouter.completion_response: {final_response}") logger.debug(f"InferenceRouter.completion_response: {final_response}")

View file

@ -13,6 +13,8 @@ from llama_stack.core.datatypes import (
ModelWithOwner, ModelWithOwner,
RegistryEntrySource, RegistryEntrySource,
) )
from llama_stack.core.request_headers import PROVIDER_DATA_VAR, NeedsRequestProviderData
from llama_stack.core.utils.dynamic import instantiate_class_type
from llama_stack.log import get_logger from llama_stack.log import get_logger
from .common import CommonRoutingTableImpl, lookup_model from .common import CommonRoutingTableImpl, lookup_model
@ -42,19 +44,104 @@ class ModelsRoutingTable(CommonRoutingTableImpl, Models):
await self.update_registered_models(provider_id, models) await self.update_registered_models(provider_id, models)
async def _get_dynamic_models_from_provider_data(self) -> list[Model]:
"""
Fetch models from providers that have credentials in the current request's provider_data.
This allows users to see models available to them from providers that require
per-request API keys (via X-LlamaStack-Provider-Data header).
Returns models with fully qualified identifiers (provider_id/model_id) but does NOT
cache them in the registry since they are user-specific.
"""
provider_data = PROVIDER_DATA_VAR.get()
if not provider_data:
return []
dynamic_models = []
for provider_id, provider in self.impls_by_provider_id.items():
# Check if this provider supports provider_data
if not isinstance(provider, NeedsRequestProviderData):
continue
# Check if provider has a validator (some providers like ollama don't need per-request credentials)
spec = getattr(provider, "__provider_spec__", None)
if not spec or not getattr(spec, "provider_data_validator", None):
continue
# Validate provider_data silently - we're speculatively checking all providers
# so validation failures are expected when user didn't provide keys for this provider
try:
validator = instantiate_class_type(spec.provider_data_validator)
validator(**provider_data)
except Exception:
# User didn't provide credentials for this provider - skip silently
continue
# Validation succeeded! User has credentials for this provider
# Now try to list models
try:
models = await provider.list_models()
if not models:
continue
# Ensure models have fully qualified identifiers with provider_id prefix
for model in models:
# Only add prefix if model identifier doesn't already have it
if not model.identifier.startswith(f"{provider_id}/"):
model.identifier = f"{provider_id}/{model.provider_resource_id}"
dynamic_models.append(model)
logger.debug(f"Fetched {len(models)} models from provider {provider_id} using provider_data")
except Exception as e:
logger.debug(f"Failed to list models from provider {provider_id} with provider_data: {e}")
continue
return dynamic_models
async def list_models(self) -> ListModelsResponse: async def list_models(self) -> ListModelsResponse:
return ListModelsResponse(data=await self.get_all_with_type("model")) # Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
return ListModelsResponse(data=registry_models + unique_dynamic_models)
async def openai_list_models(self) -> OpenAIListModelsResponse: async def openai_list_models(self) -> OpenAIListModelsResponse:
models = await self.get_all_with_type("model") # Get models from registry
registry_models = await self.get_all_with_type("model")
# Get additional models available via provider_data (user-specific, not cached)
dynamic_models = await self._get_dynamic_models_from_provider_data()
# Combine, avoiding duplicates (registry takes precedence)
registry_identifiers = {m.identifier for m in registry_models}
unique_dynamic_models = [m for m in dynamic_models if m.identifier not in registry_identifiers]
all_models = registry_models + unique_dynamic_models
openai_models = [ openai_models = [
OpenAIModel( OpenAIModel(
id=model.identifier, id=model.identifier,
object="model", object="model",
created=int(time.time()), created=int(time.time()),
owned_by="llama_stack", owned_by="llama_stack",
custom_metadata={
"model_type": model.model_type,
"provider_id": model.provider_id,
"provider_resource_id": model.provider_resource_id,
**model.metadata,
},
) )
for model in models for model in all_models
] ]
return OpenAIListModelsResponse(data=openai_models) return OpenAIListModelsResponse(data=openai_models)

View file

@ -6,6 +6,7 @@
import ssl import ssl
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any
from urllib.parse import parse_qs, urljoin, urlparse from urllib.parse import parse_qs, urljoin, urlparse
import httpx import httpx
@ -143,14 +144,21 @@ class OAuth2TokenAuthProvider(AuthProvider):
if self.config.jwks and self.config.jwks.token: if self.config.jwks and self.config.jwks.token:
headers["Authorization"] = f"Bearer {self.config.jwks.token}" headers["Authorization"] = f"Bearer {self.config.jwks.token}"
self._jwks_client = jwt.PyJWKClient( # Ensure uri is not None for PyJWKClient
self.config.jwks.uri if self.config.jwks else None, if not self.config.jwks or not self.config.jwks.uri:
cache_keys=True, raise ValueError("JWKS configuration requires a valid URI")
max_cached_keys=10,
lifespan=self.config.jwks.key_recheck_period if self.config.jwks else None, # Build kwargs conditionally to avoid passing None values
headers=headers, jwks_kwargs: dict[str, Any] = {
ssl_context=ssl_context, "cache_keys": True,
) "max_cached_keys": 10,
"headers": headers,
"ssl_context": ssl_context,
}
if self.config.jwks.key_recheck_period is not None:
jwks_kwargs["lifespan"] = self.config.jwks.key_recheck_period
self._jwks_client = jwt.PyJWKClient(self.config.jwks.uri, **jwks_kwargs)
return self._jwks_client return self._jwks_client
async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User: async def validate_jwt_token(self, token: str, scope: dict | None = None) -> User:
@ -197,23 +205,31 @@ class OAuth2TokenAuthProvider(AuthProvider):
if self.config.introspection is None: if self.config.introspection is None:
raise ValueError("Introspection is not configured") raise ValueError("Introspection is not configured")
# ssl_ctxt can be None, bool, str, or SSLContext - httpx accepts all
ssl_ctxt: ssl.SSLContext | bool = False # Default to no verification if no cafile
if self.config.tls_cafile:
ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix())
# Build post kwargs conditionally based on auth method
post_kwargs: dict[str, Any] = {
"url": self.config.introspection.url,
"data": form,
"timeout": 10.0,
}
if self.config.introspection.send_secret_in_body: if self.config.introspection.send_secret_in_body:
form["client_id"] = self.config.introspection.client_id form["client_id"] = self.config.introspection.client_id
form["client_secret"] = self.config.introspection.client_secret form["client_secret"] = self.config.introspection.client_secret
auth = None
else: else:
auth = (self.config.introspection.client_id, self.config.introspection.client_secret) # httpx auth parameter expects tuple[str | bytes, str | bytes]
ssl_ctxt = None post_kwargs["auth"] = (
if self.config.tls_cafile: self.config.introspection.client_id,
ssl_ctxt = ssl.create_default_context(cafile=self.config.tls_cafile.as_posix()) self.config.introspection.client_secret,
)
try: try:
async with httpx.AsyncClient(verify=ssl_ctxt) as client: async with httpx.AsyncClient(verify=ssl_ctxt) as client:
response = await client.post( response = await client.post(**post_kwargs)
self.config.introspection.url,
data=form,
auth=auth,
timeout=10.0, # Add a reasonable timeout
)
if response.status_code != httpx.codes.OK: if response.status_code != httpx.codes.OK:
logger.warning(f"Token introspection failed with status code: {response.status_code}") logger.warning(f"Token introspection failed with status code: {response.status_code}")
raise ValueError(f"Token introspection failed: {response.status_code}") raise ValueError(f"Token introspection failed: {response.status_code}")

View file

@ -68,8 +68,9 @@ def get_all_api_routes(
else: else:
http_method = hdrs.METH_POST http_method = hdrs.METH_POST
routes.append( routes.append(
(Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) # setting endpoint to None since don't use a Router object
) # setting endpoint to None since don't use a Router object (Route(path=path, methods=[http_method], name=name, endpoint=None), webmethod) # type: ignore[arg-type]
)
apis[api] = routes apis[api] = routes
@ -98,7 +99,7 @@ def initialize_route_impls(impls, external_apis: dict[Api, ExternalApiSpec] | No
impl = impls[api] impl = impls[api]
func = getattr(impl, route.name) func = getattr(impl, route.name)
# Get the first (and typically only) method from the set, filtering out HEAD # Get the first (and typically only) method from the set, filtering out HEAD
available_methods = [m for m in route.methods if m != "HEAD"] available_methods = [m for m in (route.methods or []) if m != "HEAD"]
if not available_methods: if not available_methods:
continue # Skip if only HEAD method is available continue # Skip if only HEAD method is available
method = available_methods[0].lower() method = available_methods[0].lower()

View file

@ -14,6 +14,7 @@ from typing import Any
import yaml import yaml
from llama_stack.apis.agents import Agents from llama_stack.apis.agents import Agents
from llama_stack.apis.batches import Batches
from llama_stack.apis.benchmarks import Benchmarks from llama_stack.apis.benchmarks import Benchmarks
from llama_stack.apis.conversations import Conversations from llama_stack.apis.conversations import Conversations
from llama_stack.apis.datasetio import DatasetIO from llama_stack.apis.datasetio import DatasetIO
@ -30,7 +31,6 @@ from llama_stack.apis.safety import Safety
from llama_stack.apis.scoring import Scoring from llama_stack.apis.scoring import Scoring
from llama_stack.apis.scoring_functions import ScoringFunctions from llama_stack.apis.scoring_functions import ScoringFunctions
from llama_stack.apis.shields import Shields from llama_stack.apis.shields import Shields
from llama_stack.apis.synthetic_data_generation import SyntheticDataGeneration
from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime from llama_stack.apis.tools import RAGToolRuntime, ToolGroups, ToolRuntime
from llama_stack.apis.vector_io import VectorIO from llama_stack.apis.vector_io import VectorIO
from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl from llama_stack.core.conversations.conversations import ConversationServiceConfig, ConversationServiceImpl
@ -63,8 +63,8 @@ class LlamaStack(
Providers, Providers,
Inference, Inference,
Agents, Agents,
Batches,
Safety, Safety,
SyntheticDataGeneration,
Datasets, Datasets,
PostTraining, PostTraining,
VectorIO, VectorIO,

View file

@ -6,12 +6,14 @@
import os import os
import threading import threading
from collections.abc import Mapping, Sequence
from datetime import datetime from datetime import datetime
from enum import Enum from enum import Enum
from typing import ( from typing import (
Annotated, Annotated,
Any, Any,
Literal, Literal,
cast,
) )
from opentelemetry import metrics, trace from opentelemetry import metrics, trace
@ -30,6 +32,10 @@ from llama_stack.schema_utils import json_schema_type, register_schema
ROOT_SPAN_MARKERS = ["__root__", "__root_span__"] ROOT_SPAN_MARKERS = ["__root__", "__root_span__"]
# Type alias for OpenTelemetry attribute values (excludes None)
AttributeValue = str | bool | int | float | Sequence[str] | Sequence[bool] | Sequence[int] | Sequence[float]
Attributes = Mapping[str, AttributeValue]
@json_schema_type @json_schema_type
class SpanStatus(Enum): class SpanStatus(Enum):
@ -428,6 +434,13 @@ _TRACER_PROVIDER = None
logger = get_logger(name=__name__, category="telemetry") logger = get_logger(name=__name__, category="telemetry")
def _clean_attributes(attrs: dict[str, Any] | None) -> Attributes | None:
"""Remove None values from attributes dict to match OpenTelemetry's expected type."""
if attrs is None:
return None
return {k: v for k, v in attrs.items() if v is not None}
def is_tracing_enabled(tracer): def is_tracing_enabled(tracer):
with tracer.start_as_current_span("check_tracing") as span: with tracer.start_as_current_span("check_tracing") as span:
return span.is_recording() return span.is_recording()
@ -456,7 +469,7 @@ class Telemetry:
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter # https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
span_exporter = OTLPSpanExporter() span_exporter = OTLPSpanExporter()
span_processor = BatchSpanProcessor(span_exporter) span_processor = BatchSpanProcessor(span_exporter)
trace.get_tracer_provider().add_span_processor(span_processor) cast(TracerProvider, trace.get_tracer_provider()).add_span_processor(span_processor)
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter()) metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
metric_provider = MeterProvider(metric_readers=[metric_reader]) metric_provider = MeterProvider(metric_readers=[metric_reader])
@ -474,7 +487,7 @@ class Telemetry:
async def shutdown(self) -> None: async def shutdown(self) -> None:
if self.is_otel_endpoint_set: if self.is_otel_endpoint_set:
trace.get_tracer_provider().force_flush() cast(TracerProvider, trace.get_tracer_provider()).force_flush()
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None: async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
if isinstance(event, UnstructuredLogEvent): if isinstance(event, UnstructuredLogEvent):
@ -515,7 +528,7 @@ class Telemetry:
unit=unit, unit=unit,
description=f"Counter for {name}", description=f"Counter for {name}",
) )
return _GLOBAL_STORAGE["counters"][name] return cast(metrics.Counter, _GLOBAL_STORAGE["counters"][name])
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge: def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
assert self.meter is not None assert self.meter is not None
@ -525,7 +538,7 @@ class Telemetry:
unit=unit, unit=unit,
description=f"Gauge for {name}", description=f"Gauge for {name}",
) )
return _GLOBAL_STORAGE["gauges"][name] return cast(metrics.ObservableGauge, _GLOBAL_STORAGE["gauges"][name])
def _log_metric(self, event: MetricEvent) -> None: def _log_metric(self, event: MetricEvent) -> None:
# Add metric as an event to the current span # Add metric as an event to the current span
@ -560,10 +573,10 @@ class Telemetry:
return return
if isinstance(event.value, int): if isinstance(event.value, int):
counter = self._get_or_create_counter(event.metric, event.unit) counter = self._get_or_create_counter(event.metric, event.unit)
counter.add(event.value, attributes=event.attributes) counter.add(event.value, attributes=_clean_attributes(event.attributes))
elif isinstance(event.value, float): elif isinstance(event.value, float):
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit) up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
up_down_counter.add(event.value, attributes=event.attributes) up_down_counter.add(event.value, attributes=_clean_attributes(event.attributes))
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter: def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
assert self.meter is not None assert self.meter is not None
@ -573,7 +586,7 @@ class Telemetry:
unit=unit, unit=unit,
description=f"UpDownCounter for {name}", description=f"UpDownCounter for {name}",
) )
return _GLOBAL_STORAGE["up_down_counters"][name] return cast(metrics.UpDownCounter, _GLOBAL_STORAGE["up_down_counters"][name])
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None: def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
with self._lock: with self._lock:
@ -601,6 +614,7 @@ class Telemetry:
if event.payload.parent_span_id: if event.payload.parent_span_id:
parent_span_id = int(event.payload.parent_span_id, 16) parent_span_id = int(event.payload.parent_span_id, 16)
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id) parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
if parent_span:
context = trace.set_span_in_context(parent_span) context = trace.set_span_in_context(parent_span)
elif traceparent: elif traceparent:
carrier = { carrier = {
@ -612,15 +626,17 @@ class Telemetry:
span = tracer.start_span( span = tracer.start_span(
name=event.payload.name, name=event.payload.name,
context=context, context=context,
attributes=event.attributes or {}, attributes=_clean_attributes(event.attributes),
) )
_GLOBAL_STORAGE["active_spans"][span_id] = span _GLOBAL_STORAGE["active_spans"][span_id] = span
elif isinstance(event.payload, SpanEndPayload): elif isinstance(event.payload, SpanEndPayload):
span = _GLOBAL_STORAGE["active_spans"].get(span_id) span = _GLOBAL_STORAGE["active_spans"].get(span_id) # type: ignore[assignment]
if span: if span:
if event.attributes: if event.attributes:
span.set_attributes(event.attributes) cleaned_attrs = _clean_attributes(event.attributes)
if cleaned_attrs:
span.set_attributes(cleaned_attrs)
status = ( status = (
trace.Status(status_code=trace.StatusCode.OK) trace.Status(status_code=trace.StatusCode.OK)

View file

@ -12,7 +12,7 @@ from llama_stack.core.ui.modules.api import llama_stack_api
def models(): def models():
# Models Section # Models Section
st.header("Models") st.header("Models")
models_info = {m.identifier: m.to_dict() for m in llama_stack_api.client.models.list()} models_info = {m.id: m.model_dump() for m in llama_stack_api.client.models.list()}
selected_model = st.selectbox("Select a model", list(models_info.keys())) selected_model = st.selectbox("Select a model", list(models_info.keys()))
st.json(models_info[selected_model]) st.json(models_info[selected_model])

View file

@ -12,7 +12,11 @@ from llama_stack.core.ui.modules.api import llama_stack_api
with st.sidebar: with st.sidebar:
st.header("Configuration") st.header("Configuration")
available_models = llama_stack_api.client.models.list() available_models = llama_stack_api.client.models.list()
available_models = [model.identifier for model in available_models if model.model_type == "llm"] available_models = [
model.id
for model in available_models
if model.custom_metadata and model.custom_metadata.get("model_type") == "llm"
]
selected_model = st.selectbox( selected_model = st.selectbox(
"Choose a model", "Choose a model",
available_models, available_models,

View file

@ -152,6 +152,37 @@ docker run \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
docker run -it \
--pull always \
--network host \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v $HOME/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e INFERENCE_MODEL=$INFERENCE_MODEL \
-e DEH_URL=$DEH_URL \
-e CHROMA_URL=$CHROMA_URL \
llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
{% if run_configs %}
Available run configurations for this distribution:
{% for config in run_configs %}
- `{{ config }}`
{% endfor %}
{% endif %}
### Via Conda ### Via Conda
Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available. Make sure you have done `pip install llama-stack` and have the Llama Stack CLI available.

View file

@ -109,6 +109,9 @@ storage:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources: registered_resources:
models: models:
- metadata: {} - metadata: {}

View file

@ -68,6 +68,36 @@ docker run \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
--gpu all \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
{% if run_configs %}
Available run configurations for this distribution:
{% for config in run_configs %}
- `{{ config }}`
{% endfor %}
{% endif %}
### Via venv ### Via venv
Make sure you have the Llama Stack CLI available. Make sure you have the Llama Stack CLI available.

View file

@ -122,6 +122,9 @@ storage:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources: registered_resources:
models: models:
- metadata: {} - metadata: {}

View file

@ -117,13 +117,42 @@ docker run \
-it \ -it \
--pull always \ --pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ./run.yaml:/root/my-run.yaml \ -v ~/.llama:/root/.llama \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \ -e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-{{ name }} \ llamastack/distribution-{{ name }} \
--config /root/my-run.yaml \
--port $LLAMA_STACK_PORT --port $LLAMA_STACK_PORT
``` ```
### Via Docker with Custom Run Configuration
You can also run the Docker container with a custom run configuration file by mounting it into the container:
```bash
# Set the path to your custom run.yaml file
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
LLAMA_STACK_PORT=8321
docker run \
-it \
--pull always \
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-v ~/.llama:/root/.llama \
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
llamastack/distribution-{{ name }} \
--port $LLAMA_STACK_PORT
```
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
{% if run_configs %}
Available run configurations for this distribution:
{% for config in run_configs %}
- `{{ config }}`
{% endfor %}
{% endif %}
### Via venv ### Via venv
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment. If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.

View file

@ -111,6 +111,9 @@ storage:
conversations: conversations:
table_name: openai_conversations table_name: openai_conversations
backend: sql_default backend: sql_default
prompts:
namespace: prompts
backend: kv_default
registered_resources: registered_resources:
models: models:
- metadata: {} - metadata: {}

Some files were not shown because too many files have changed in this diff Show more