mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge remote-tracking branch 'upstream/main' into elasticsearch-integration
This commit is contained in:
commit
2407115ee8
1050 changed files with 65153 additions and 2821 deletions
64
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
64
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
name: Install llama-stack-client
|
||||
description: Install llama-stack-client based on branch context and client-version input
|
||||
|
||||
inputs:
|
||||
client-version:
|
||||
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
|
||||
required: false
|
||||
default: ""
|
||||
|
||||
outputs:
|
||||
uv-index-url:
|
||||
description: 'UV_INDEX_URL to use (set for release branches)'
|
||||
value: ${{ steps.configure.outputs.uv-index-url }}
|
||||
uv-extra-index-url:
|
||||
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
|
||||
value: ${{ steps.configure.outputs.uv-extra-index-url }}
|
||||
install-after-sync:
|
||||
description: 'Whether to install client after uv sync'
|
||||
value: ${{ steps.configure.outputs.install-after-sync }}
|
||||
install-source:
|
||||
description: 'Where to install client from after sync'
|
||||
value: ${{ steps.configure.outputs.install-source }}
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Configure client installation
|
||||
id: configure
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the branch we're working with
|
||||
BRANCH="${{ github.base_ref || github.ref }}"
|
||||
BRANCH="${BRANCH#refs/heads/}"
|
||||
|
||||
echo "Working with branch: $BRANCH"
|
||||
|
||||
# On release branches: use test.pypi for uv sync, then install from git
|
||||
# On non-release branches: install based on client-version after sync
|
||||
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
|
||||
echo "Detected release branch: $BRANCH"
|
||||
|
||||
# Check if matching branch exists in client repo
|
||||
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
|
||||
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
|
||||
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Configure to use test.pypi for sync (to resolve RC versions)
|
||||
echo "uv-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
|
||||
echo "uv-extra-index-url=https://pypi.org/simple/" >> $GITHUB_OUTPUT
|
||||
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
# Install from main git after sync
|
||||
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
# Use published version from PyPI (installed by sync)
|
||||
echo "install-after-sync=false" >> $GITHUB_OUTPUT
|
||||
elif [ -n "${{ inputs.client-version }}" ]; then
|
||||
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
|
|
@ -94,7 +94,7 @@ runs:
|
|||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
|
||||
name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }}
|
||||
path: |
|
||||
*.log
|
||||
retention-days: 1
|
||||
|
|
|
|||
23
.github/actions/setup-runner/action.yml
vendored
23
.github/actions/setup-runner/action.yml
vendored
|
|
@ -18,8 +18,17 @@ runs:
|
|||
python-version: ${{ inputs.python-version }}
|
||||
version: 0.7.6
|
||||
|
||||
- name: Configure client installation
|
||||
id: client-config
|
||||
uses: ./.github/actions/install-llama-stack-client
|
||||
with:
|
||||
client-version: ${{ inputs.client-version }}
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
env:
|
||||
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
|
||||
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||
run: |
|
||||
echo "Updating project dependencies via uv sync"
|
||||
uv sync --all-groups
|
||||
|
|
@ -27,16 +36,10 @@ runs:
|
|||
echo "Installing ad-hoc dependencies"
|
||||
uv pip install faiss-cpu
|
||||
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
uv pip install llama-stack-client
|
||||
else
|
||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
# Install specific client version after sync if needed
|
||||
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||
fi
|
||||
|
||||
echo "Installed llama packages"
|
||||
|
|
|
|||
|
|
@ -42,18 +42,7 @@ runs:
|
|||
- name: Build Llama Stack
|
||||
shell: bash
|
||||
run: |
|
||||
# Install llama-stack-client-python based on the client-version input
|
||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||
echo "Installing latest llama-stack-client-python from main branch"
|
||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
|
||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||
echo "Installing published llama-stack-client-python from PyPI"
|
||||
unset LLAMA_STACK_CLIENT_DIR
|
||||
else
|
||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Client is already installed by setup-runner (handles both main and release branches)
|
||||
echo "Building Llama Stack"
|
||||
|
||||
LLAMA_STACK_DIR=. \
|
||||
|
|
|
|||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -4,6 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
|||
|
||||
| Name | File | Purpose |
|
||||
| ---- | ---- | ------- |
|
||||
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
|
||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||
|
|
|
|||
578
.github/workflows/backward-compat.yml
vendored
Normal file
578
.github/workflows/backward-compat.yml
vendored
Normal file
|
|
@ -0,0 +1,578 @@
|
|||
name: Backward Compatibility Check
|
||||
|
||||
run-name: Check backward compatibility for run.yaml configs
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
|
||||
- 'release-[0-9]+.[0-9]+.[0-9]+'
|
||||
- 'release-[0-9]+.[0-9]+'
|
||||
paths:
|
||||
- 'src/llama_stack/core/datatypes.py'
|
||||
- 'src/llama_stack/providers/datatypes.py'
|
||||
- 'src/llama_stack/distributions/**/run.yaml'
|
||||
- 'tests/backward_compat/**'
|
||||
- '.github/workflows/backward-compat.yml'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
check-main-compatibility:
|
||||
name: Check Compatibility with main
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0 # Need full history to access main branch
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev
|
||||
|
||||
- name: Extract run.yaml files from main branch
|
||||
id: extract_configs
|
||||
run: |
|
||||
# Get list of run.yaml paths from main
|
||||
git fetch origin main
|
||||
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
|
||||
|
||||
if [ -z "$CONFIG_PATHS" ]; then
|
||||
echo "No run.yaml files found in main branch"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Extract all configs to a temp directory
|
||||
mkdir -p /tmp/main_configs
|
||||
echo "Extracting configs from main branch:"
|
||||
|
||||
while IFS= read -r config_path; do
|
||||
if [ -z "$config_path" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
# Extract filename for storage
|
||||
filename=$(basename $(dirname "$config_path"))
|
||||
echo " - $filename (from $config_path)"
|
||||
|
||||
git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml"
|
||||
done <<< "$CONFIG_PATHS"
|
||||
|
||||
echo ""
|
||||
echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files"
|
||||
|
||||
- name: Test all configs from main
|
||||
id: test_configs
|
||||
continue-on-error: true
|
||||
run: |
|
||||
# Run pytest once with all configs parameterized
|
||||
if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then
|
||||
echo "failed=false" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "failed=true" >> $GITHUB_OUTPUT
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Check for breaking change acknowledgment
|
||||
id: check_ack
|
||||
if: steps.test_configs.outputs.failed == 'true'
|
||||
run: |
|
||||
echo "Breaking changes detected. Checking for acknowledgment..."
|
||||
|
||||
# Check PR title for '!:' marker (conventional commits)
|
||||
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
|
||||
echo "✓ Breaking change acknowledged in PR title"
|
||||
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check commit messages for BREAKING CHANGE:
|
||||
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
|
||||
echo "✓ Breaking change acknowledged in commit message"
|
||||
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "✗ Breaking change NOT acknowledged"
|
||||
echo "acknowledged=false" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Evaluate results
|
||||
if: always()
|
||||
run: |
|
||||
FAILED="${{ steps.test_configs.outputs.failed }}"
|
||||
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
|
||||
|
||||
if [[ "$FAILED" == "true" ]]; then
|
||||
if [[ "$ACKNOWLEDGED" == "true" ]]; then
|
||||
echo ""
|
||||
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
||||
echo ""
|
||||
echo "This PR introduces backward-incompatible changes to run.yaml."
|
||||
echo "The changes have been properly acknowledged."
|
||||
echo ""
|
||||
exit 0 # Pass the check
|
||||
else
|
||||
echo ""
|
||||
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
||||
echo ""
|
||||
echo "This PR introduces backward-incompatible changes to run.yaml"
|
||||
echo "that will break existing user configurations."
|
||||
echo ""
|
||||
echo "To acknowledge this breaking change, do ONE of:"
|
||||
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
|
||||
echo " 2. Add the 'breaking-change' label to this PR"
|
||||
echo " 3. Include 'BREAKING CHANGE:' in a commit message"
|
||||
echo ""
|
||||
exit 1 # Fail the check
|
||||
fi
|
||||
fi
|
||||
|
||||
test-integration-main:
|
||||
name: Run Integration Tests with main Config
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Extract ci-tests run.yaml from main
|
||||
run: |
|
||||
git fetch origin main
|
||||
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
|
||||
echo "Extracted ci-tests run.yaml from main branch"
|
||||
|
||||
- name: Setup test environment
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: '3.12'
|
||||
client-version: 'latest'
|
||||
setup: 'ollama'
|
||||
suite: 'base'
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run integration tests with main config
|
||||
id: test_integration
|
||||
continue-on-error: true
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: /tmp/main-ci-tests-run.yaml
|
||||
setup: 'ollama'
|
||||
inference-mode: 'replay'
|
||||
suite: 'base'
|
||||
|
||||
- name: Check for breaking change acknowledgment
|
||||
id: check_ack
|
||||
if: steps.test_integration.outcome == 'failure'
|
||||
run: |
|
||||
echo "Integration tests failed. Checking for acknowledgment..."
|
||||
|
||||
# Check PR title for '!:' marker (conventional commits)
|
||||
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
|
||||
echo "✓ Breaking change acknowledged in PR title"
|
||||
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check commit messages for BREAKING CHANGE:
|
||||
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
|
||||
echo "✓ Breaking change acknowledged in commit message"
|
||||
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "✗ Breaking change NOT acknowledged"
|
||||
echo "acknowledged=false" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Evaluate integration test results
|
||||
if: always()
|
||||
run: |
|
||||
TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}"
|
||||
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
|
||||
|
||||
if [[ "$TEST_FAILED" == "true" ]]; then
|
||||
if [[ "$ACKNOWLEDGED" == "true" ]]; then
|
||||
echo ""
|
||||
echo "⚠️ WARNING: Integration tests failed with main config but acknowledged"
|
||||
echo ""
|
||||
exit 0 # Pass the check
|
||||
else
|
||||
echo ""
|
||||
echo "❌ ERROR: Integration tests failed with main config without acknowledgment"
|
||||
echo ""
|
||||
echo "To acknowledge this breaking change, do ONE of:"
|
||||
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
|
||||
echo " 2. Include 'BREAKING CHANGE:' in a commit message"
|
||||
echo ""
|
||||
exit 1 # Fail the check
|
||||
fi
|
||||
fi
|
||||
|
||||
test-integration-release:
|
||||
name: Run Integration Tests with Latest Release (Informational)
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Get latest release
|
||||
id: get_release
|
||||
run: |
|
||||
# Get the latest release from GitHub
|
||||
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$LATEST_TAG" ]; then
|
||||
echo "No releases found, skipping release compatibility check"
|
||||
echo "has_release=false" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Latest release: $LATEST_TAG"
|
||||
echo "has_release=true" >> $GITHUB_OUTPUT
|
||||
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Extract ci-tests run.yaml from release
|
||||
if: steps.get_release.outputs.has_release == 'true'
|
||||
id: extract_config
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
|
||||
# Try with src/ prefix first (newer releases), then without (older releases)
|
||||
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
|
||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
||||
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
|
||||
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||
else
|
||||
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
|
||||
echo "has_config=false" >> $GITHUB_OUTPUT
|
||||
fi
|
||||
|
||||
- name: Setup test environment
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: '3.12'
|
||||
client-version: 'latest'
|
||||
setup: 'ollama'
|
||||
suite: 'base'
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run integration tests with release config (PR branch)
|
||||
id: test_release_pr
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
continue-on-error: true
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: /tmp/release-ci-tests-run.yaml
|
||||
setup: 'ollama'
|
||||
inference-mode: 'replay'
|
||||
suite: 'base'
|
||||
|
||||
- name: Checkout main branch to test baseline
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
run: |
|
||||
git checkout origin/main
|
||||
|
||||
- name: Setup test environment for main
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
uses: ./.github/actions/setup-test-environment
|
||||
with:
|
||||
python-version: '3.12'
|
||||
client-version: 'latest'
|
||||
setup: 'ollama'
|
||||
suite: 'base'
|
||||
inference-mode: 'replay'
|
||||
|
||||
- name: Run integration tests with release config (main branch)
|
||||
id: test_release_main
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
continue-on-error: true
|
||||
uses: ./.github/actions/run-and-record-tests
|
||||
with:
|
||||
stack-config: /tmp/release-ci-tests-run.yaml
|
||||
setup: 'ollama'
|
||||
inference-mode: 'replay'
|
||||
suite: 'base'
|
||||
|
||||
- name: Report results and post PR comment
|
||||
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
PR_OUTCOME="${{ steps.test_release_pr.outcome }}"
|
||||
MAIN_OUTCOME="${{ steps.test_release_main.outcome }}"
|
||||
|
||||
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
|
||||
# NEW breaking change - PR fails but main passes
|
||||
echo "::error::🚨 This PR introduces a NEW breaking change!"
|
||||
|
||||
# Check if we already posted a comment (to avoid spam on every push)
|
||||
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1)
|
||||
|
||||
if [[ -z "$EXISTING_COMMENT" ]]; then
|
||||
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected
|
||||
|
||||
**Integration tests against release \`$RELEASE_TAG\` are now failing**
|
||||
|
||||
⚠️ This PR introduces a breaking change that affects compatibility with the latest release.
|
||||
|
||||
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
|
||||
- Existing configurations may break
|
||||
|
||||
The tests pass on \`main\` but fail with this PR's changes.
|
||||
|
||||
> **Note:** This is informational only and does not block merge.
|
||||
> Consider whether this breaking change is acceptable for users."
|
||||
else
|
||||
echo "Comment already exists, skipping to avoid spam"
|
||||
fi
|
||||
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## 🚨 NEW Breaking Change Detected
|
||||
|
||||
**Integration tests against release \`$RELEASE_TAG\` FAILED**
|
||||
|
||||
⚠️ **This PR introduces a NEW breaking change**
|
||||
|
||||
- Tests **PASS** on main branch ✅
|
||||
- Tests **FAIL** on PR branch ❌
|
||||
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
|
||||
- Existing configurations may break
|
||||
|
||||
> **Note:** This is informational only and does not block merge.
|
||||
> Consider whether this breaking change is acceptable for users.
|
||||
EOF
|
||||
|
||||
elif [[ "$PR_OUTCOME" == "failure" ]]; then
|
||||
# Existing breaking change - both PR and main fail
|
||||
echo "::warning::Breaking change already exists in main branch"
|
||||
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## ⚠️ Release Compatibility Test Failed (Existing Issue)
|
||||
|
||||
**Integration tests against release \`$RELEASE_TAG\` FAILED**
|
||||
|
||||
- Tests **FAIL** on main branch ❌
|
||||
- Tests **FAIL** on PR branch ❌
|
||||
- This breaking change already exists in main (not introduced by this PR)
|
||||
|
||||
> **Note:** This is informational only.
|
||||
EOF
|
||||
|
||||
else
|
||||
# Success - tests pass
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## ✅ Release Compatibility Test Passed
|
||||
|
||||
Integration tests against release \`$RELEASE_TAG\` passed successfully.
|
||||
This PR maintains compatibility with the latest release.
|
||||
EOF
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
check-schema-release-compatibility:
|
||||
name: Check Schema Compatibility with Latest Release (Informational)
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- name: Checkout PR branch
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
with:
|
||||
fetch-depth: 0
|
||||
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||
with:
|
||||
python-version: '3.12'
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||
with:
|
||||
enable-cache: true
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
uv sync --group dev
|
||||
|
||||
- name: Get latest release
|
||||
id: get_release
|
||||
run: |
|
||||
# Get the latest release from GitHub
|
||||
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
|
||||
|
||||
if [ -z "$LATEST_TAG" ]; then
|
||||
echo "No releases found, skipping release compatibility check"
|
||||
echo "has_release=false" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
echo "Latest release: $LATEST_TAG"
|
||||
echo "has_release=true" >> $GITHUB_OUTPUT
|
||||
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
|
||||
- name: Extract configs from release
|
||||
if: steps.get_release.outputs.has_release == 'true'
|
||||
id: extract_release_configs
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
|
||||
# Get run.yaml files from the release (try both src/ and old path)
|
||||
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
|
||||
|
||||
if [ -z "$CONFIG_PATHS" ]; then
|
||||
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
|
||||
echo "has_configs=false" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Extract all configs to a temp directory
|
||||
mkdir -p /tmp/release_configs
|
||||
echo "Extracting configs from release $RELEASE_TAG:"
|
||||
|
||||
while IFS= read -r config_path; do
|
||||
if [ -z "$config_path" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
filename=$(basename $(dirname "$config_path"))
|
||||
echo " - $filename (from $config_path)"
|
||||
|
||||
git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true
|
||||
done <<< "$CONFIG_PATHS"
|
||||
|
||||
echo ""
|
||||
echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files"
|
||||
echo "has_configs=true" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Test against release configs (PR branch)
|
||||
id: test_schema_pr
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
|
||||
|
||||
- name: Checkout main branch to test baseline
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||
run: |
|
||||
git checkout origin/main
|
||||
|
||||
- name: Install dependencies for main
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||
run: |
|
||||
uv sync --group dev
|
||||
|
||||
- name: Test against release configs (main branch)
|
||||
id: test_schema_main
|
||||
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||
continue-on-error: true
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
|
||||
|
||||
- name: Report results and post PR comment
|
||||
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||
run: |
|
||||
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||
PR_OUTCOME="${{ steps.test_schema_pr.outcome }}"
|
||||
MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}"
|
||||
|
||||
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
|
||||
# NEW breaking change - PR fails but main passes
|
||||
echo "::error::🚨 This PR introduces a NEW schema breaking change!"
|
||||
|
||||
# Check if we already posted a comment (to avoid spam on every push)
|
||||
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1)
|
||||
|
||||
if [[ -z "$EXISTING_COMMENT" ]]; then
|
||||
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected
|
||||
|
||||
**Schema validation against release \`$RELEASE_TAG\` is now failing**
|
||||
|
||||
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
||||
|
||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||
- Existing run.yaml configurations will fail validation
|
||||
|
||||
The tests pass on \`main\` but fail with this PR's changes.
|
||||
|
||||
> **Note:** This is informational only and does not block merge.
|
||||
> Consider whether this breaking change is acceptable for users."
|
||||
else
|
||||
echo "Comment already exists, skipping to avoid spam"
|
||||
fi
|
||||
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## 🚨 NEW Schema Breaking Change Detected
|
||||
|
||||
**Schema validation against release \`$RELEASE_TAG\` FAILED**
|
||||
|
||||
⚠️ **This PR introduces a NEW schema breaking change**
|
||||
|
||||
- Tests **PASS** on main branch ✅
|
||||
- Tests **FAIL** on PR branch ❌
|
||||
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||
- Existing run.yaml configurations will fail validation
|
||||
|
||||
> **Note:** This is informational only and does not block merge.
|
||||
> Consider whether this breaking change is acceptable for users.
|
||||
EOF
|
||||
|
||||
elif [[ "$PR_OUTCOME" == "failure" ]]; then
|
||||
# Existing breaking change - both PR and main fail
|
||||
echo "::warning::Schema breaking change already exists in main branch"
|
||||
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## ⚠️ Release Schema Compatibility Failed (Existing Issue)
|
||||
|
||||
**Schema validation against release \`$RELEASE_TAG\` FAILED**
|
||||
|
||||
- Tests **FAIL** on main branch ❌
|
||||
- Tests **FAIL** on PR branch ❌
|
||||
- This schema breaking change already exists in main (not introduced by this PR)
|
||||
|
||||
> **Note:** This is informational only.
|
||||
EOF
|
||||
|
||||
else
|
||||
# Success - tests pass
|
||||
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||
## ✅ Release Schema Compatibility Passed
|
||||
|
||||
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
||||
This PR maintains backward compatibility with the latest release.
|
||||
EOF
|
||||
fi
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
15
.github/workflows/integration-auth-tests.yml
vendored
15
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -4,13 +4,17 @@ run-name: Run the integration test suite with Kubernetes authentication
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'distributions/**'
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
- 'tests/integration/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -91,6 +95,9 @@ jobs:
|
|||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
prompts:
|
||||
namespace: prompts
|
||||
backend: kv_default
|
||||
server:
|
||||
port: 8321
|
||||
EOF
|
||||
|
|
|
|||
|
|
@ -4,11 +4,15 @@ run-name: Run the integration test suite with SqlStore
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'llama_stack/providers/utils/sqlstore/**'
|
||||
- 'src/llama_stack/providers/utils/sqlstore/**'
|
||||
- 'tests/integration/sqlstore/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -64,7 +68,7 @@ jobs:
|
|||
|
||||
- name: Upload test logs
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
|
||||
path: |
|
||||
|
|
|
|||
14
.github/workflows/integration-tests.yml
vendored
14
.github/workflows/integration-tests.yml
vendored
|
|
@ -4,13 +4,17 @@ run-name: Run the integration test suites from tests/integration in replay mode
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
types: [opened, synchronize, reopened]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
- 'tests/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -47,7 +51,7 @@ jobs:
|
|||
strategy:
|
||||
fail-fast: false
|
||||
matrix:
|
||||
client-type: [library, docker]
|
||||
client-type: [library, docker, server]
|
||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||
|
|
|
|||
|
|
@ -4,12 +4,16 @@ run-name: Run the integration test suite with various VectorIO providers
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
- 'tests/integration/vector_io/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -209,7 +213,7 @@ jobs:
|
|||
|
||||
- name: Upload all logs to artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
|
||||
path: |
|
||||
|
|
|
|||
63
.github/workflows/pre-commit.yml
vendored
63
.github/workflows/pre-commit.yml
vendored
|
|
@ -5,7 +5,9 @@ run-name: Run pre-commit checks
|
|||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches: [main]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||
|
|
@ -41,25 +43,43 @@ jobs:
|
|||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'llama_stack/ui/'
|
||||
cache-dependency-path: 'src/llama_stack/ui/'
|
||||
|
||||
- name: Set up uv
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
|
||||
- name: Install npm dependencies
|
||||
run: npm ci
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
|
||||
- name: Install pre-commit
|
||||
run: python -m pip install pre-commit
|
||||
|
||||
- name: Cache pre-commit
|
||||
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
|
||||
with:
|
||||
path: ~/.cache/pre-commit
|
||||
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
|
||||
|
||||
- name: Run pre-commit
|
||||
id: precommit
|
||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
||||
continue-on-error: true
|
||||
run: |
|
||||
set +e
|
||||
pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log
|
||||
status=${PIPESTATUS[0]}
|
||||
echo "status=$status" >> $GITHUB_OUTPUT
|
||||
exit 0
|
||||
env:
|
||||
SKIP: no-commit-to-branch
|
||||
SKIP: no-commit-to-branch,mypy
|
||||
RUFF_OUTPUT_FORMAT: github
|
||||
|
||||
- name: Check pre-commit results
|
||||
if: steps.precommit.outcome == 'failure'
|
||||
if: steps.precommit.outputs.status != '0'
|
||||
run: |
|
||||
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
||||
echo "::warning::Some pre-commit hooks failed. Check the output above for details."
|
||||
echo ""
|
||||
echo "Failed hooks output:"
|
||||
cat /tmp/precommit.log
|
||||
exit 1
|
||||
|
||||
- name: Debug
|
||||
|
|
@ -109,3 +129,30 @@ jobs:
|
|||
echo "$unstaged_files"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
- name: Configure client installation
|
||||
id: client-config
|
||||
uses: ./.github/actions/install-llama-stack-client
|
||||
|
||||
- name: Sync dev + type_checking dependencies
|
||||
env:
|
||||
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
|
||||
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||
run: |
|
||||
uv sync --group dev --group type_checking
|
||||
|
||||
# Install specific client version after sync if needed
|
||||
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||
fi
|
||||
|
||||
- name: Run mypy (full type_checking)
|
||||
run: |
|
||||
set +e
|
||||
uv run --group dev --group type_checking mypy
|
||||
status=$?
|
||||
if [ $status -ne 0 ]; then
|
||||
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
|
||||
fi
|
||||
exit $status
|
||||
|
|
|
|||
4
.github/workflows/precommit-trigger.yml
vendored
4
.github/workflows/precommit-trigger.yml
vendored
|
|
@ -145,12 +145,12 @@ jobs:
|
|||
with:
|
||||
node-version: '20'
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'llama_stack/ui/'
|
||||
cache-dependency-path: 'src/llama_stack/ui/'
|
||||
|
||||
- name: Install npm dependencies
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
run: npm ci
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
|
||||
- name: Run pre-commit
|
||||
if: steps.check_author.outputs.authorized == 'true'
|
||||
|
|
|
|||
32
.github/workflows/providers-build.yml
vendored
32
.github/workflows/providers-build.yml
vendored
|
|
@ -7,24 +7,24 @@ on:
|
|||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/build.py'
|
||||
- 'llama_stack/cli/stack/_build.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- 'src/llama_stack/cli/stack/build.py'
|
||||
- 'src/llama_stack/cli/stack/_build.py'
|
||||
- 'src/llama_stack/core/build.*'
|
||||
- 'src/llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'src/llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/build.py'
|
||||
- 'llama_stack/cli/stack/_build.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- 'src/llama_stack/cli/stack/build.py'
|
||||
- 'src/llama_stack/cli/stack/_build.py'
|
||||
- 'src/llama_stack/core/build.*'
|
||||
- 'src/llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-build.yml'
|
||||
- 'llama_stack/distributions/**'
|
||||
- 'src/llama_stack/distributions/**'
|
||||
- 'pyproject.toml'
|
||||
- 'containers/Containerfile'
|
||||
- '.dockerignore'
|
||||
|
|
@ -45,7 +45,7 @@ jobs:
|
|||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
run: |
|
||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||
|
||||
build:
|
||||
|
|
@ -107,13 +107,13 @@ jobs:
|
|||
|
||||
- name: Build container image
|
||||
run: |
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests
|
||||
|
||||
- name: Inspect the container image entrypoint
|
||||
|
|
@ -143,17 +143,17 @@ jobs:
|
|||
run: |
|
||||
yq -i '
|
||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||
' llama_stack/distributions/ci-tests/build.yaml
|
||||
' src/llama_stack/distributions/ci-tests/build.yaml
|
||||
|
||||
- name: Build UBI9 container image
|
||||
run: |
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||
docker build . \
|
||||
-f containers/Containerfile \
|
||||
--build-arg INSTALL_MODE=editable \
|
||||
--build-arg DISTRO_NAME=ci-tests \
|
||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
||||
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||
-t llama-stack:ci-tests-ubi9
|
||||
|
||||
- name: Inspect UBI9 image
|
||||
|
|
|
|||
24
.github/workflows/providers-list-deps.yml
vendored
24
.github/workflows/providers-list-deps.yml
vendored
|
|
@ -7,22 +7,22 @@ on:
|
|||
branches:
|
||||
- main
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- 'src/llama_stack/cli/stack/list_deps.py'
|
||||
- 'src/llama_stack/cli/stack/_list_deps.py'
|
||||
- 'src/llama_stack/core/build.*'
|
||||
- 'src/llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'src/llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- 'llama_stack/cli/stack/list_deps.py'
|
||||
- 'llama_stack/cli/stack/_list_deps.py'
|
||||
- 'llama_stack/core/build.*'
|
||||
- 'llama_stack/core/*.sh'
|
||||
- 'src/llama_stack/cli/stack/list_deps.py'
|
||||
- 'src/llama_stack/cli/stack/_list_deps.py'
|
||||
- 'src/llama_stack/core/build.*'
|
||||
- 'src/llama_stack/core/*.sh'
|
||||
- '.github/workflows/providers-list-deps.yml'
|
||||
- 'llama_stack/templates/**'
|
||||
- 'src/llama_stack/templates/**'
|
||||
- 'pyproject.toml'
|
||||
|
||||
concurrency:
|
||||
|
|
@ -41,7 +41,7 @@ jobs:
|
|||
- name: Generate Distribution List
|
||||
id: set-matrix
|
||||
run: |
|
||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||
|
||||
list-deps:
|
||||
|
|
@ -102,4 +102,4 @@ jobs:
|
|||
USE_COPY_NOT_MOUNT: "true"
|
||||
LLAMA_STACK_DIR: "."
|
||||
run: |
|
||||
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
||||
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml
|
||||
|
|
|
|||
4
.github/workflows/python-build-test.yml
vendored
4
.github/workflows/python-build-test.yml
vendored
|
|
@ -10,7 +10,7 @@ on:
|
|||
branches:
|
||||
- main
|
||||
paths-ignore:
|
||||
- 'llama_stack/ui/**'
|
||||
- 'src/llama_stack/ui/**'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
|
@ -24,7 +24,7 @@ jobs:
|
|||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Install uv
|
||||
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
|
||||
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
activate-environment: true
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ on:
|
|||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- 'src/llama_stack/**'
|
||||
- 'tests/integration/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -78,7 +78,7 @@ jobs:
|
|||
|
||||
- name: Upload all logs to artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test
|
||||
path: |
|
||||
|
|
|
|||
6
.github/workflows/test-external.yml
vendored
6
.github/workflows/test-external.yml
vendored
|
|
@ -8,8 +8,8 @@ on:
|
|||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
- 'tests/integration/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -84,7 +84,7 @@ jobs:
|
|||
|
||||
- name: Upload all logs to artifacts
|
||||
if: ${{ always() }}
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test
|
||||
path: |
|
||||
|
|
|
|||
12
.github/workflows/ui-unit-tests.yml
vendored
12
.github/workflows/ui-unit-tests.yml
vendored
|
|
@ -8,7 +8,7 @@ on:
|
|||
pull_request:
|
||||
branches: [ main ]
|
||||
paths:
|
||||
- 'llama_stack/ui/**'
|
||||
- 'src/llama_stack/ui/**'
|
||||
- '.github/workflows/ui-unit-tests.yml' # This workflow
|
||||
workflow_dispatch:
|
||||
|
||||
|
|
@ -33,22 +33,22 @@ jobs:
|
|||
with:
|
||||
node-version: ${{ matrix.node-version }}
|
||||
cache: 'npm'
|
||||
cache-dependency-path: 'llama_stack/ui/package-lock.json'
|
||||
cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
run: npm ci
|
||||
|
||||
- name: Run linting
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
run: npm run lint
|
||||
|
||||
- name: Run format check
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
run: npm run format:check
|
||||
|
||||
- name: Run unit tests
|
||||
working-directory: llama_stack/ui
|
||||
working-directory: src/llama_stack/ui
|
||||
env:
|
||||
CI: true
|
||||
|
||||
|
|
|
|||
14
.github/workflows/unit-tests.yml
vendored
14
.github/workflows/unit-tests.yml
vendored
|
|
@ -4,12 +4,16 @@ run-name: Run the unit test suite
|
|||
|
||||
on:
|
||||
push:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
pull_request:
|
||||
branches: [ main ]
|
||||
branches:
|
||||
- main
|
||||
- 'release-[0-9]+.[0-9]+.x'
|
||||
paths:
|
||||
- 'llama_stack/**'
|
||||
- '!llama_stack/ui/**'
|
||||
- 'src/llama_stack/**'
|
||||
- '!src/llama_stack/ui/**'
|
||||
- 'tests/unit/**'
|
||||
- 'uv.lock'
|
||||
- 'pyproject.toml'
|
||||
|
|
@ -45,7 +49,7 @@ jobs:
|
|||
|
||||
- name: Upload test results
|
||||
if: always()
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||
with:
|
||||
name: test-results-${{ matrix.python }}
|
||||
path: |
|
||||
|
|
|
|||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -32,3 +32,6 @@ CLAUDE.md
|
|||
docs/.docusaurus/
|
||||
docs/node_modules/
|
||||
docs/static/imported-files/
|
||||
docs/docs/api-deprecated/
|
||||
docs/docs/api-experimental/
|
||||
docs/docs/api/
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ repos:
|
|||
hooks:
|
||||
- id: ruff
|
||||
args: [ --fix ]
|
||||
exclude: ^llama_stack/strong_typing/.*$
|
||||
exclude: ^src/llama_stack/strong_typing/.*$
|
||||
- id: ruff-format
|
||||
|
||||
- repo: https://github.com/adamchainz/blacken-docs
|
||||
|
|
@ -58,18 +58,27 @@ repos:
|
|||
- id: uv-lock
|
||||
|
||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||
rev: v1.16.1
|
||||
rev: v1.18.2
|
||||
hooks:
|
||||
- id: mypy
|
||||
additional_dependencies:
|
||||
- uv==0.6.2
|
||||
- mypy
|
||||
- pytest
|
||||
- rich
|
||||
- types-requests
|
||||
- pydantic
|
||||
- httpx
|
||||
pass_filenames: false
|
||||
|
||||
- repo: local
|
||||
hooks:
|
||||
- id: mypy-full
|
||||
name: mypy (full type_checking)
|
||||
entry: uv run --group dev --group type_checking mypy
|
||||
language: system
|
||||
pass_filenames: false
|
||||
stages: [manual]
|
||||
|
||||
# - repo: https://github.com/tcort/markdown-link-check
|
||||
# rev: v3.11.2
|
||||
# hooks:
|
||||
|
|
@ -86,7 +95,7 @@ repos:
|
|||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
||||
files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
|
||||
- id: provider-codegen
|
||||
name: Provider Codegen
|
||||
additional_dependencies:
|
||||
|
|
@ -95,7 +104,7 @@ repos:
|
|||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
files: ^llama_stack/providers/.*$
|
||||
files: ^src/llama_stack/providers/.*$
|
||||
- id: openapi-codegen
|
||||
name: API Spec Codegen
|
||||
additional_dependencies:
|
||||
|
|
@ -104,7 +113,7 @@ repos:
|
|||
language: python
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
files: ^llama_stack/apis/|^docs/openapi_generator/
|
||||
files: ^src/llama_stack/apis/|^docs/openapi_generator/
|
||||
- id: check-workflows-use-hashes
|
||||
name: Check GitHub Actions use SHA-pinned actions
|
||||
entry: ./scripts/check-workflows-use-hashes.sh
|
||||
|
|
@ -120,7 +129,7 @@ repos:
|
|||
pass_filenames: false
|
||||
require_serial: true
|
||||
always_run: true
|
||||
files: ^llama_stack/.*$
|
||||
files: ^src/llama_stack/.*$
|
||||
- id: forbid-pytest-asyncio
|
||||
name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
|
||||
entry: bash
|
||||
|
|
@ -150,10 +159,9 @@ repos:
|
|||
name: Format & Lint UI
|
||||
entry: bash ./scripts/run-ui-linter.sh
|
||||
language: system
|
||||
files: ^llama_stack/ui/.*\.(ts|tsx)$
|
||||
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
|
||||
pass_filenames: false
|
||||
require_serial: true
|
||||
|
||||
- id: check-log-usage
|
||||
name: Ensure 'llama_stack.log' usage for logging
|
||||
entry: bash
|
||||
|
|
@ -172,7 +180,23 @@ repos:
|
|||
exit 1
|
||||
fi
|
||||
exit 0
|
||||
|
||||
- id: fips-compliance
|
||||
name: Ensure llama-stack remains FIPS compliant
|
||||
entry: bash
|
||||
language: system
|
||||
types: [python]
|
||||
pass_filenames: true
|
||||
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
|
||||
args:
|
||||
- -c
|
||||
- |
|
||||
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
|
||||
echo;
|
||||
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
|
||||
echo " These functions are not FIPS-compliant"
|
||||
echo;
|
||||
exit 1;
|
||||
} || true
|
||||
ci:
|
||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
||||
|
|
|
|||
|
|
@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
|
|||
|
||||
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
|
||||
|
||||
To run the expanded mypy configuration that CI enforces, use:
|
||||
|
||||
```bash
|
||||
uv run pre-commit run mypy-full --hook-stage manual --all-files
|
||||
```
|
||||
|
||||
or invoke mypy directly with all optional dependencies:
|
||||
|
||||
```bash
|
||||
uv run --group dev --group type_checking mypy
|
||||
```
|
||||
|
||||
```{caution}
|
||||
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
||||
```
|
||||
|
|
|
|||
18
MANIFEST.in
18
MANIFEST.in
|
|
@ -1,11 +1,11 @@
|
|||
include pyproject.toml
|
||||
include llama_stack/models/llama/llama3/tokenizer.model
|
||||
include llama_stack/models/llama/llama4/tokenizer.model
|
||||
include llama_stack/core/*.sh
|
||||
include llama_stack/cli/scripts/*.sh
|
||||
include llama_stack/distributions/*/*.yaml
|
||||
exclude llama_stack/distributions/ci-tests
|
||||
include src/llama_stack/models/llama/llama3/tokenizer.model
|
||||
include src/llama_stack/models/llama/llama4/tokenizer.model
|
||||
include src/llama_stack/core/*.sh
|
||||
include src/llama_stack/cli/scripts/*.sh
|
||||
include src/llama_stack/distributions/*/*.yaml
|
||||
exclude src/llama_stack/distributions/ci-tests
|
||||
include tests/integration/test_cases/inference/*.json
|
||||
include llama_stack/models/llama/*/*.md
|
||||
include llama_stack/tests/integration/*.jpg
|
||||
prune llama_stack/distributions/ci-tests
|
||||
include src/llama_stack/models/llama/*/*.md
|
||||
include src/llama_stack/tests/integration/*.jpg
|
||||
prune src/llama_stack/distributions/ci-tests
|
||||
|
|
|
|||
|
|
@ -44,14 +44,6 @@ data:
|
|||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||
metadata_store:
|
||||
type: sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -115,13 +107,21 @@ data:
|
|||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
references:
|
||||
stores:
|
||||
metadata:
|
||||
backend: kv_default
|
||||
namespace: registry
|
||||
inference:
|
||||
backend: sql_default
|
||||
table_name: inference_store
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
backend: sql_default
|
||||
table_name: openai_conversations
|
||||
prompts:
|
||||
backend: kv_default
|
||||
namespace: prompts
|
||||
models:
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
|
|
|
|||
|
|
@ -36,14 +36,6 @@ providers:
|
|||
persistence:
|
||||
namespace: vector_io::chroma_remote
|
||||
backend: kv_default
|
||||
files:
|
||||
- provider_id: meta-reference-files
|
||||
provider_type: inline::localfs
|
||||
config:
|
||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
||||
metadata_store:
|
||||
table_name: files_metadata
|
||||
backend: sql_default
|
||||
safety:
|
||||
- provider_id: llama-guard
|
||||
provider_type: inline::llama-guard
|
||||
|
|
@ -108,6 +100,9 @@ storage:
|
|||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
prompts:
|
||||
namespace: prompts
|
||||
backend: kv_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata:
|
||||
|
|
|
|||
|
|
@ -1,610 +0,0 @@
|
|||
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
|
||||
|
||||
organization:
|
||||
# Name of your organization or company, used to determine the name of the client
|
||||
# and headings.
|
||||
name: llama-stack-client
|
||||
docs: https://llama-stack.readthedocs.io/en/latest/
|
||||
contact: llamastack@meta.com
|
||||
security:
|
||||
- {}
|
||||
- BearerAuth: []
|
||||
security_schemes:
|
||||
BearerAuth:
|
||||
type: http
|
||||
scheme: bearer
|
||||
# `targets` define the output targets and their customization options, such as
|
||||
# whether to emit the Node SDK and what it's package name should be.
|
||||
targets:
|
||||
node:
|
||||
package_name: llama-stack-client
|
||||
production_repo: llamastack/llama-stack-client-typescript
|
||||
publish:
|
||||
npm: false
|
||||
python:
|
||||
package_name: llama_stack_client
|
||||
production_repo: llamastack/llama-stack-client-python
|
||||
options:
|
||||
use_uv: true
|
||||
publish:
|
||||
pypi: true
|
||||
project_name: llama_stack_client
|
||||
kotlin:
|
||||
reverse_domain: com.llama_stack_client.api
|
||||
production_repo: null
|
||||
publish:
|
||||
maven: false
|
||||
go:
|
||||
package_name: llama-stack-client
|
||||
production_repo: llamastack/llama-stack-client-go
|
||||
options:
|
||||
enable_v2: true
|
||||
back_compat_use_shared_package: false
|
||||
|
||||
# `client_settings` define settings for the API client, such as extra constructor
|
||||
# arguments (used for authentication), retry behavior, idempotency, etc.
|
||||
client_settings:
|
||||
default_env_prefix: LLAMA_STACK_CLIENT
|
||||
opts:
|
||||
api_key:
|
||||
type: string
|
||||
read_env: LLAMA_STACK_CLIENT_API_KEY
|
||||
auth: { security_scheme: BearerAuth }
|
||||
nullable: true
|
||||
|
||||
# `environments` are a map of the name of the environment (e.g. "sandbox",
|
||||
# "production") to the corresponding url to use.
|
||||
environments:
|
||||
production: http://any-hosted-llama-stack.com
|
||||
|
||||
# `pagination` defines [pagination schemes] which provides a template to match
|
||||
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
|
||||
pagination:
|
||||
- name: datasets_iterrows
|
||||
type: offset
|
||||
request:
|
||||
dataset_id:
|
||||
type: string
|
||||
start_index:
|
||||
type: integer
|
||||
x-stainless-pagination-property:
|
||||
purpose: offset_count_param
|
||||
limit:
|
||||
type: integer
|
||||
response:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
next_index:
|
||||
type: integer
|
||||
x-stainless-pagination-property:
|
||||
purpose: offset_count_start_field
|
||||
- name: openai_cursor_page
|
||||
type: cursor
|
||||
request:
|
||||
limit:
|
||||
type: integer
|
||||
after:
|
||||
type: string
|
||||
x-stainless-pagination-property:
|
||||
purpose: next_cursor_param
|
||||
response:
|
||||
data:
|
||||
type: array
|
||||
items: {}
|
||||
has_more:
|
||||
type: boolean
|
||||
last_id:
|
||||
type: string
|
||||
x-stainless-pagination-property:
|
||||
purpose: next_cursor_field
|
||||
# `resources` define the structure and organziation for your API, such as how
|
||||
# methods and models are grouped together and accessed. See the [configuration
|
||||
# guide] for more information.
|
||||
#
|
||||
# [configuration guide]:
|
||||
# https://app.stainlessapi.com/docs/guides/configure#resources
|
||||
resources:
|
||||
$shared:
|
||||
models:
|
||||
agent_config: AgentConfig
|
||||
interleaved_content_item: InterleavedContentItem
|
||||
interleaved_content: InterleavedContent
|
||||
param_type: ParamType
|
||||
safety_violation: SafetyViolation
|
||||
sampling_params: SamplingParams
|
||||
scoring_result: ScoringResult
|
||||
message: Message
|
||||
user_message: UserMessage
|
||||
completion_message: CompletionMessage
|
||||
tool_response_message: ToolResponseMessage
|
||||
system_message: SystemMessage
|
||||
tool_call: ToolCall
|
||||
query_result: RAGQueryResult
|
||||
document: RAGDocument
|
||||
query_config: RAGQueryConfig
|
||||
response_format: ResponseFormat
|
||||
toolgroups:
|
||||
models:
|
||||
tool_group: ToolGroup
|
||||
list_tool_groups_response: ListToolGroupsResponse
|
||||
methods:
|
||||
register: post /v1/toolgroups
|
||||
get: get /v1/toolgroups/{toolgroup_id}
|
||||
list: get /v1/toolgroups
|
||||
unregister: delete /v1/toolgroups/{toolgroup_id}
|
||||
tools:
|
||||
methods:
|
||||
get: get /v1/tools/{tool_name}
|
||||
list:
|
||||
endpoint: get /v1/tools
|
||||
paginated: false
|
||||
|
||||
tool_runtime:
|
||||
models:
|
||||
tool_def: ToolDef
|
||||
tool_invocation_result: ToolInvocationResult
|
||||
methods:
|
||||
list_tools:
|
||||
endpoint: get /v1/tool-runtime/list-tools
|
||||
paginated: false
|
||||
invoke_tool: post /v1/tool-runtime/invoke
|
||||
subresources:
|
||||
rag_tool:
|
||||
methods:
|
||||
insert: post /v1/tool-runtime/rag-tool/insert
|
||||
query: post /v1/tool-runtime/rag-tool/query
|
||||
|
||||
responses:
|
||||
models:
|
||||
response_object_stream: OpenAIResponseObjectStream
|
||||
response_object: OpenAIResponseObject
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/responses
|
||||
streaming:
|
||||
stream_event_model: responses.response_object_stream
|
||||
param_discriminator: stream
|
||||
retrieve: get /v1/responses/{response_id}
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/responses
|
||||
delete:
|
||||
type: http
|
||||
endpoint: delete /v1/responses/{response_id}
|
||||
subresources:
|
||||
input_items:
|
||||
methods:
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/responses/{response_id}/input_items
|
||||
|
||||
conversations:
|
||||
models:
|
||||
conversation_object: Conversation
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/conversations
|
||||
retrieve: get /v1/conversations/{conversation_id}
|
||||
update:
|
||||
type: http
|
||||
endpoint: post /v1/conversations/{conversation_id}
|
||||
delete:
|
||||
type: http
|
||||
endpoint: delete /v1/conversations/{conversation_id}
|
||||
subresources:
|
||||
items:
|
||||
methods:
|
||||
get:
|
||||
type: http
|
||||
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/conversations/{conversation_id}/items
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/conversations/{conversation_id}/items
|
||||
|
||||
inspect:
|
||||
models:
|
||||
healthInfo: HealthInfo
|
||||
providerInfo: ProviderInfo
|
||||
routeInfo: RouteInfo
|
||||
versionInfo: VersionInfo
|
||||
methods:
|
||||
health: get /v1/health
|
||||
version: get /v1/version
|
||||
|
||||
embeddings:
|
||||
models:
|
||||
create_embeddings_response: OpenAIEmbeddingsResponse
|
||||
methods:
|
||||
create: post /v1/embeddings
|
||||
|
||||
chat:
|
||||
models:
|
||||
chat_completion_chunk: OpenAIChatCompletionChunk
|
||||
subresources:
|
||||
completions:
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/chat/completions
|
||||
streaming:
|
||||
stream_event_model: chat.chat_completion_chunk
|
||||
param_discriminator: stream
|
||||
list:
|
||||
type: http
|
||||
endpoint: get /v1/chat/completions
|
||||
retrieve:
|
||||
type: http
|
||||
endpoint: get /v1/chat/completions/{completion_id}
|
||||
completions:
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1/completions
|
||||
streaming:
|
||||
param_discriminator: stream
|
||||
|
||||
vector_io:
|
||||
models:
|
||||
queryChunksResponse: QueryChunksResponse
|
||||
methods:
|
||||
insert: post /v1/vector-io/insert
|
||||
query: post /v1/vector-io/query
|
||||
|
||||
vector_stores:
|
||||
models:
|
||||
vector_store: VectorStoreObject
|
||||
list_vector_stores_response: VectorStoreListResponse
|
||||
vector_store_delete_response: VectorStoreDeleteResponse
|
||||
vector_store_search_response: VectorStoreSearchResponsePage
|
||||
methods:
|
||||
create: post /v1/vector_stores
|
||||
list:
|
||||
endpoint: get /v1/vector_stores
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}
|
||||
update: post /v1/vector_stores/{vector_store_id}
|
||||
delete: delete /v1/vector_stores/{vector_store_id}
|
||||
search: post /v1/vector_stores/{vector_store_id}/search
|
||||
subresources:
|
||||
files:
|
||||
models:
|
||||
vector_store_file: VectorStoreFileObject
|
||||
methods:
|
||||
list: get /v1/vector_stores/{vector_store_id}/files
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
|
||||
create: post /v1/vector_stores/{vector_store_id}/files
|
||||
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
|
||||
file_batches:
|
||||
models:
|
||||
vector_store_file_batches: VectorStoreFileBatchObject
|
||||
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
|
||||
methods:
|
||||
create: post /v1/vector_stores/{vector_store_id}/file_batches
|
||||
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
|
||||
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
|
||||
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
|
||||
|
||||
models:
|
||||
models:
|
||||
model: Model
|
||||
list_models_response: ListModelsResponse
|
||||
methods:
|
||||
retrieve: get /v1/models/{model_id}
|
||||
list:
|
||||
endpoint: get /v1/models
|
||||
paginated: false
|
||||
register: post /v1/models
|
||||
unregister: delete /v1/models/{model_id}
|
||||
subresources:
|
||||
openai:
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/models
|
||||
paginated: false
|
||||
|
||||
providers:
|
||||
models:
|
||||
list_providers_response: ListProvidersResponse
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/providers
|
||||
paginated: false
|
||||
retrieve: get /v1/providers/{provider_id}
|
||||
|
||||
routes:
|
||||
models:
|
||||
list_routes_response: ListRoutesResponse
|
||||
methods:
|
||||
list:
|
||||
endpoint: get /v1/inspect/routes
|
||||
paginated: false
|
||||
|
||||
|
||||
moderations:
|
||||
models:
|
||||
create_response: ModerationObject
|
||||
methods:
|
||||
create: post /v1/moderations
|
||||
|
||||
|
||||
safety:
|
||||
models:
|
||||
run_shield_response: RunShieldResponse
|
||||
methods:
|
||||
run_shield: post /v1/safety/run-shield
|
||||
|
||||
|
||||
shields:
|
||||
models:
|
||||
shield: Shield
|
||||
list_shields_response: ListShieldsResponse
|
||||
methods:
|
||||
retrieve: get /v1/shields/{identifier}
|
||||
list:
|
||||
endpoint: get /v1/shields
|
||||
paginated: false
|
||||
register: post /v1/shields
|
||||
delete: delete /v1/shields/{identifier}
|
||||
|
||||
synthetic_data_generation:
|
||||
models:
|
||||
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
|
||||
methods:
|
||||
generate: post /v1/synthetic-data-generation/generate
|
||||
|
||||
telemetry:
|
||||
models:
|
||||
span_with_status: SpanWithStatus
|
||||
trace: Trace
|
||||
query_spans_response: QuerySpansResponse
|
||||
event: Event
|
||||
query_condition: QueryCondition
|
||||
methods:
|
||||
query_traces:
|
||||
endpoint: post /v1alpha/telemetry/traces
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
|
||||
query_spans:
|
||||
endpoint: post /v1alpha/telemetry/spans
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
query_metrics:
|
||||
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
|
||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
||||
# log_event: post /v1alpha/telemetry/events
|
||||
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
|
||||
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
|
||||
get_trace: get /v1alpha/telemetry/traces/{trace_id}
|
||||
|
||||
scoring:
|
||||
methods:
|
||||
score: post /v1/scoring/score
|
||||
score_batch: post /v1/scoring/score-batch
|
||||
scoring_functions:
|
||||
methods:
|
||||
retrieve: get /v1/scoring-functions/{scoring_fn_id}
|
||||
list:
|
||||
endpoint: get /v1/scoring-functions
|
||||
paginated: false
|
||||
register: post /v1/scoring-functions
|
||||
models:
|
||||
scoring_fn: ScoringFn
|
||||
scoring_fn_params: ScoringFnParams
|
||||
list_scoring_functions_response: ListScoringFunctionsResponse
|
||||
|
||||
benchmarks:
|
||||
methods:
|
||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
|
||||
list:
|
||||
endpoint: get /v1alpha/eval/benchmarks
|
||||
paginated: false
|
||||
register: post /v1alpha/eval/benchmarks
|
||||
models:
|
||||
benchmark: Benchmark
|
||||
list_benchmarks_response: ListBenchmarksResponse
|
||||
|
||||
files:
|
||||
methods:
|
||||
create: post /v1/files
|
||||
list: get /v1/files
|
||||
retrieve: get /v1/files/{file_id}
|
||||
delete: delete /v1/files/{file_id}
|
||||
content: get /v1/files/{file_id}/content
|
||||
models:
|
||||
file: OpenAIFileObject
|
||||
list_files_response: ListOpenAIFileResponse
|
||||
delete_file_response: OpenAIFileDeleteResponse
|
||||
|
||||
alpha:
|
||||
subresources:
|
||||
inference:
|
||||
methods:
|
||||
rerank: post /v1alpha/inference/rerank
|
||||
|
||||
post_training:
|
||||
models:
|
||||
algorithm_config: AlgorithmConfig
|
||||
post_training_job: PostTrainingJob
|
||||
list_post_training_jobs_response: ListPostTrainingJobsResponse
|
||||
methods:
|
||||
preference_optimize: post /v1alpha/post-training/preference-optimize
|
||||
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
|
||||
subresources:
|
||||
job:
|
||||
methods:
|
||||
artifacts: get /v1alpha/post-training/job/artifacts
|
||||
cancel: post /v1alpha/post-training/job/cancel
|
||||
status: get /v1alpha/post-training/job/status
|
||||
list:
|
||||
endpoint: get /v1alpha/post-training/jobs
|
||||
paginated: false
|
||||
|
||||
eval:
|
||||
methods:
|
||||
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
||||
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
||||
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
||||
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
||||
|
||||
subresources:
|
||||
jobs:
|
||||
methods:
|
||||
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
||||
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
|
||||
models:
|
||||
evaluate_response: EvaluateResponse
|
||||
benchmark_config: BenchmarkConfig
|
||||
job: Job
|
||||
|
||||
agents:
|
||||
methods:
|
||||
create: post /v1alpha/agents
|
||||
list: get /v1alpha/agents
|
||||
retrieve: get /v1alpha/agents/{agent_id}
|
||||
delete: delete /v1alpha/agents/{agent_id}
|
||||
models:
|
||||
inference_step: InferenceStep
|
||||
tool_execution_step: ToolExecutionStep
|
||||
tool_response: ToolResponse
|
||||
shield_call_step: ShieldCallStep
|
||||
memory_retrieval_step: MemoryRetrievalStep
|
||||
subresources:
|
||||
session:
|
||||
models:
|
||||
session: Session
|
||||
methods:
|
||||
list: get /v1alpha/agents/{agent_id}/sessions
|
||||
create: post /v1alpha/agents/{agent_id}/session
|
||||
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
|
||||
steps:
|
||||
methods:
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
|
||||
turn:
|
||||
models:
|
||||
turn: Turn
|
||||
turn_response_event: AgentTurnResponseEvent
|
||||
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
|
||||
methods:
|
||||
create:
|
||||
type: http
|
||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
|
||||
streaming:
|
||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
||||
param_discriminator: stream
|
||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
|
||||
resume:
|
||||
type: http
|
||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
|
||||
streaming:
|
||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
||||
param_discriminator: stream
|
||||
|
||||
beta:
|
||||
subresources:
|
||||
datasets:
|
||||
models:
|
||||
list_datasets_response: ListDatasetsResponse
|
||||
methods:
|
||||
register: post /v1beta/datasets
|
||||
retrieve: get /v1beta/datasets/{dataset_id}
|
||||
list:
|
||||
endpoint: get /v1beta/datasets
|
||||
paginated: false
|
||||
unregister: delete /v1beta/datasets/{dataset_id}
|
||||
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
|
||||
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
|
||||
|
||||
|
||||
settings:
|
||||
license: MIT
|
||||
unwrap_response_fields: [ data ]
|
||||
|
||||
openapi:
|
||||
transformations:
|
||||
- command: renameValue
|
||||
reason: pydantic reserved name
|
||||
args:
|
||||
filter:
|
||||
only:
|
||||
- '$.components.schemas.InferenceStep.properties.model_response'
|
||||
rename:
|
||||
python:
|
||||
property_name: 'inference_model_response'
|
||||
|
||||
# - command: renameValue
|
||||
# reason: pydantic reserved name
|
||||
# args:
|
||||
# filter:
|
||||
# only:
|
||||
# - '$.components.schemas.Model.properties.model_type'
|
||||
# rename:
|
||||
# python:
|
||||
# property_name: 'type'
|
||||
- command: mergeObject
|
||||
reason: Better return_type using enum
|
||||
args:
|
||||
target:
|
||||
- '$.components.schemas'
|
||||
object:
|
||||
ReturnType:
|
||||
additionalProperties: false
|
||||
properties:
|
||||
type:
|
||||
enum:
|
||||
- string
|
||||
- number
|
||||
- boolean
|
||||
- array
|
||||
- object
|
||||
- json
|
||||
- union
|
||||
- chat_completion_input
|
||||
- completion_input
|
||||
- agent_turn_input
|
||||
required:
|
||||
- type
|
||||
type: object
|
||||
- command: replaceProperties
|
||||
reason: Replace return type properties with better model (see above)
|
||||
args:
|
||||
filter:
|
||||
only:
|
||||
- '$.components.schemas.ScoringFn.properties.return_type'
|
||||
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
|
||||
value:
|
||||
$ref: '#/components/schemas/ReturnType'
|
||||
- command: oneOfToAnyOf
|
||||
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
|
||||
- reason: For better names
|
||||
command: extractToRefs
|
||||
args:
|
||||
ref:
|
||||
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
|
||||
name: '#/components/schemas/ToolCallOrString'
|
||||
|
||||
# `readme` is used to configure the code snippets that will be rendered in the
|
||||
# README.md of various SDKs. In particular, you can change the `headline`
|
||||
# snippet's endpoint and the arguments to call it with.
|
||||
readme:
|
||||
example_requests:
|
||||
default:
|
||||
type: request
|
||||
endpoint: post /v1/chat/completions
|
||||
params: &ref_0 {}
|
||||
headline:
|
||||
type: request
|
||||
endpoint: post /v1/models
|
||||
params: *ref_0
|
||||
pagination:
|
||||
type: request
|
||||
endpoint: post /v1/chat/completions
|
||||
params: {}
|
||||
|
|
@ -15,6 +15,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4212,6 +4347,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -5474,11 +5934,44 @@ components:
|
|||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
OpenAIResponseInputMessageContentFile:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: input_file
|
||||
default: input_file
|
||||
description: >-
|
||||
The type of the input item. Always `input_file`.
|
||||
file_data:
|
||||
type: string
|
||||
description: >-
|
||||
The data of the file to be sent to the model.
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
file_url:
|
||||
type: string
|
||||
description: >-
|
||||
The URL of the file to be sent to the model.
|
||||
filename:
|
||||
type: string
|
||||
description: >-
|
||||
The name of the file to be sent to the model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentFile
|
||||
description: >-
|
||||
File content for input messages in OpenAI response format.
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -5499,6 +5992,10 @@ components:
|
|||
default: input_image
|
||||
description: >-
|
||||
Content type identifier, always "input_image"
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
image_url:
|
||||
type: string
|
||||
description: (Optional) URL of the image content
|
||||
|
|
@ -6735,14 +7232,9 @@ components:
|
|||
Error details for failed OpenAI response requests.
|
||||
OpenAIResponseInput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
OpenAIResponseInputToolFileSearch:
|
||||
type: object
|
||||
|
|
@ -6898,6 +7390,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -6971,6 +7467,30 @@ components:
|
|||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
OpenAIResponsePrompt:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier of the prompt template
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
description: >-
|
||||
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||
for template substitution. The substitution values can either be strings,
|
||||
or other Response input types like images or files.
|
||||
version:
|
||||
type: string
|
||||
description: >-
|
||||
Version number of the prompt to use (defaults to latest if not specified)
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
title: OpenAIResponsePrompt
|
||||
description: >-
|
||||
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||
OpenAIResponseText:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -7228,6 +7748,10 @@ components:
|
|||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Prompt object with ID, version, and variables.
|
||||
instructions:
|
||||
type: string
|
||||
previous_response_id:
|
||||
|
|
@ -7305,6 +7829,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -9867,7 +10395,7 @@ components:
|
|||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
|
|
@ -9878,7 +10406,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
|
|
@ -10049,7 +10577,7 @@ components:
|
|||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
|
@ -10062,7 +10590,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
|
|
@ -10190,6 +10718,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -10210,10 +10742,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -10222,6 +10750,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -10286,7 +10815,7 @@ components:
|
|||
InsertChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to insert the chunks into.
|
||||
|
|
@ -10305,13 +10834,13 @@ components:
|
|||
description: The time to live of the chunks.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunks
|
||||
title: InsertChunksRequest
|
||||
QueryChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to query.
|
||||
|
|
@ -10331,7 +10860,7 @@ components:
|
|||
description: The parameters of the query.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- query
|
||||
title: QueryChunksRequest
|
||||
QueryChunksResponse:
|
||||
|
|
@ -11600,7 +12129,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
@ -11850,7 +12378,7 @@ components:
|
|||
description: Type of the step in an agent turn.
|
||||
const: memory_retrieval
|
||||
default: memory_retrieval
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: string
|
||||
description: >-
|
||||
The IDs of the vector databases to retrieve context from.
|
||||
|
|
@ -11863,7 +12391,7 @@ components:
|
|||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
- inserted_context
|
||||
title: MemoryRetrievalStep
|
||||
description: >-
|
||||
|
|
@ -13460,6 +13988,19 @@ tags:
|
|||
description: >-
|
||||
APIs for creating and interacting with agentic systems.
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
|
|
@ -13534,6 +14075,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- Conversations
|
||||
- DatasetIO
|
||||
|
|
|
|||
|
|
@ -58,13 +58,21 @@ storage:
|
|||
sql_default:
|
||||
type: sql_sqlite
|
||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
||||
references:
|
||||
stores:
|
||||
metadata:
|
||||
backend: kv_default
|
||||
namespace: registry
|
||||
inference:
|
||||
backend: sql_default
|
||||
table_name: inference_store
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
backend: sql_default
|
||||
table_name: openai_conversations
|
||||
prompts:
|
||||
backend: kv_default
|
||||
namespace: prompts
|
||||
models:
|
||||
- metadata: {}
|
||||
model_id: ${env.INFERENCE_MODEL}
|
||||
|
|
|
|||
|
|
@ -113,13 +113,21 @@ data:
|
|||
db: ${env.POSTGRES_DB:=llamastack}
|
||||
user: ${env.POSTGRES_USER:=llamastack}
|
||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||
references:
|
||||
stores:
|
||||
metadata:
|
||||
backend: kv_default
|
||||
namespace: registry
|
||||
inference:
|
||||
backend: sql_default
|
||||
table_name: inference_store
|
||||
max_write_queue_size: 10000
|
||||
num_writers: 4
|
||||
conversations:
|
||||
backend: sql_default
|
||||
table_name: openai_conversations
|
||||
prompts:
|
||||
backend: kv_default
|
||||
namespace: prompts
|
||||
models:
|
||||
- metadata:
|
||||
embedding_dimension: 768
|
||||
|
|
|
|||
|
|
@ -106,6 +106,9 @@ storage:
|
|||
conversations:
|
||||
table_name: openai_conversations
|
||||
backend: sql_default
|
||||
prompts:
|
||||
namespace: prompts
|
||||
backend: kv_default
|
||||
registered_resources:
|
||||
models:
|
||||
- metadata:
|
||||
|
|
|
|||
|
|
@ -79,6 +79,33 @@ docker run \
|
|||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via Docker with Custom Run Configuration
|
||||
|
||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||
|
||||
```bash
|
||||
# Set the path to your custom run.yaml file
|
||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
||||
LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
--gpu all \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
||||
llamastack/distribution-meta-reference-gpu \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||
|
||||
Available run configurations for this distribution:
|
||||
- `run.yaml`
|
||||
- `run-with-safety.yaml`
|
||||
|
||||
### Via venv
|
||||
|
||||
Make sure you have the Llama Stack CLI available.
|
||||
|
|
|
|||
|
|
@ -127,13 +127,39 @@ docker run \
|
|||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ./run.yaml:/root/my-run.yaml \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
llamastack/distribution-nvidia \
|
||||
--config /root/my-run.yaml \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
### Via Docker with Custom Run Configuration
|
||||
|
||||
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||
|
||||
```bash
|
||||
# Set the path to your custom run.yaml file
|
||||
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
||||
LLAMA_STACK_PORT=8321
|
||||
|
||||
docker run \
|
||||
-it \
|
||||
--pull always \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
-v ~/.llama:/root/.llama \
|
||||
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
||||
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||
llamastack/distribution-nvidia \
|
||||
--port $LLAMA_STACK_PORT
|
||||
```
|
||||
|
||||
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||
|
||||
Available run configurations for this distribution:
|
||||
- `run.yaml`
|
||||
- `run-with-safety.yaml`
|
||||
|
||||
### Via venv
|
||||
|
||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||
|
|
|
|||
27
docs/docs/providers/files/remote_openai.mdx
Normal file
27
docs/docs/providers/files/remote_openai.mdx
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
---
|
||||
description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
|
||||
sidebar_label: Remote - Openai
|
||||
title: remote::openai
|
||||
---
|
||||
|
||||
# remote::openai
|
||||
|
||||
## Description
|
||||
|
||||
OpenAI Files API provider for managing files through OpenAI's native file storage service.
|
||||
|
||||
## Configuration
|
||||
|
||||
| Field | Type | Required | Default | Description |
|
||||
|-------|------|----------|---------|-------------|
|
||||
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication |
|
||||
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
```yaml
|
||||
api_key: ${env.OPENAI_API_KEY}
|
||||
metadata_store:
|
||||
table_name: openai_files_metadata
|
||||
backend: sql_default
|
||||
```
|
||||
|
|
@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
|||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
|
||||
|
||||
## Sample Configuration
|
||||
|
||||
|
|
|
|||
|
|
@ -72,14 +72,14 @@ description: |
|
|||
Example with hybrid search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
||||
# Using RRF ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
|
|
@ -91,7 +91,7 @@ description: |
|
|||
|
||||
# Using weighted ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
|
|
@ -105,7 +105,7 @@ description: |
|
|||
Example with explicit vector search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
|
@ -114,7 +114,7 @@ description: |
|
|||
Example with keyword search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
|
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
|
|||
Example with hybrid search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
||||
# Using RRF ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
|
|
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(
|
|||
|
||||
# Using weighted ranker
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={
|
||||
"mode": "hybrid",
|
||||
|
|
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
|
|||
Example with explicit vector search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
|
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
|
|||
Example with keyword search:
|
||||
```python
|
||||
response = await vector_io.query_chunks(
|
||||
vector_db_id="my_db",
|
||||
vector_store_id="my_db",
|
||||
query="your query here",
|
||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||
)
|
||||
|
|
|
|||
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
|
|||
'providers/eval/remote_nvidia'
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Telemetry',
|
||||
collapsed: true,
|
||||
items: [
|
||||
'providers/telemetry/index',
|
||||
'providers/telemetry/inline_meta-reference'
|
||||
],
|
||||
},
|
||||
{
|
||||
type: 'category',
|
||||
label: 'Batches',
|
||||
|
|
|
|||
741
docs/static/deprecated-llama-stack-spec.html
vendored
741
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1414,6 +1414,193 @@
|
|||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": true
|
||||
}
|
||||
},
|
||||
"/v1/openai/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -3901,7 +4088,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
@ -4391,7 +4577,7 @@
|
|||
"const": "memory_retrieval",
|
||||
"default": "memory_retrieval"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"vector_store_ids": {
|
||||
"type": "string",
|
||||
"description": "The IDs of the vector databases to retrieve context from."
|
||||
},
|
||||
|
|
@ -4405,7 +4591,7 @@
|
|||
"turn_id",
|
||||
"step_id",
|
||||
"step_type",
|
||||
"vector_db_ids",
|
||||
"vector_store_ids",
|
||||
"inserted_context"
|
||||
],
|
||||
"title": "MemoryRetrievalStep",
|
||||
|
|
@ -6402,6 +6588,451 @@
|
|||
"title": "Job",
|
||||
"description": "A job execution instance with status tracking."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -8527,29 +9158,14 @@
|
|||
"OpenAIResponseInput": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||
}
|
||||
|
|
@ -8592,16 +9208,53 @@
|
|||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInputMessageContentFile": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_file",
|
||||
"default": "input_file",
|
||||
"description": "The type of the input item. Always `input_file`."
|
||||
},
|
||||
"file_data": {
|
||||
"type": "string",
|
||||
"description": "The data of the file to be sent to the model."
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"file_url": {
|
||||
"type": "string",
|
||||
"description": "The URL of the file to be sent to the model."
|
||||
},
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "The name of the file to be sent to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentFile",
|
||||
"description": "File content for input messages in OpenAI response format."
|
||||
},
|
||||
"OpenAIResponseInputMessageContentImage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -8629,6 +9282,10 @@
|
|||
"default": "input_image",
|
||||
"description": "Content type identifier, always \"input_image\""
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string",
|
||||
"description": "(Optional) URL of the image content"
|
||||
|
|
@ -8992,6 +9649,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -9416,6 +10077,32 @@
|
|||
"title": "OpenAIResponseOutputMessageWebSearchToolCall",
|
||||
"description": "Web search tool call output message for OpenAI responses."
|
||||
},
|
||||
"OpenAIResponsePrompt": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier of the prompt template"
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||
},
|
||||
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id"
|
||||
],
|
||||
"title": "OpenAIResponsePrompt",
|
||||
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||
},
|
||||
"OpenAIResponseText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9786,6 +10473,10 @@
|
|||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
|
|
@ -9874,6 +10565,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -13442,6 +14137,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks",
|
||||
"description": ""
|
||||
|
|
@ -13492,6 +14192,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Benchmarks",
|
||||
"DatasetIO",
|
||||
"Datasets",
|
||||
|
|
|
|||
559
docs/static/deprecated-llama-stack-spec.yaml
vendored
559
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1012,6 +1012,141 @@ paths:
|
|||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: true
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: true
|
||||
/v1/openai/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2862,7 +2997,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
@ -3253,7 +3387,7 @@ components:
|
|||
description: Type of the step in an agent turn.
|
||||
const: memory_retrieval
|
||||
default: memory_retrieval
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: string
|
||||
description: >-
|
||||
The IDs of the vector databases to retrieve context from.
|
||||
|
|
@ -3266,7 +3400,7 @@ components:
|
|||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
- inserted_context
|
||||
title: MemoryRetrievalStep
|
||||
description: >-
|
||||
|
|
@ -4737,6 +4871,331 @@ components:
|
|||
title: Job
|
||||
description: >-
|
||||
A job execution instance with status tracking.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -6370,14 +6829,9 @@ components:
|
|||
Error details for failed OpenAI response requests.
|
||||
OpenAIResponseInput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
"OpenAIResponseInputFunctionToolCallOutput":
|
||||
type: object
|
||||
|
|
@ -6408,11 +6862,44 @@ components:
|
|||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
OpenAIResponseInputMessageContentFile:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: input_file
|
||||
default: input_file
|
||||
description: >-
|
||||
The type of the input item. Always `input_file`.
|
||||
file_data:
|
||||
type: string
|
||||
description: >-
|
||||
The data of the file to be sent to the model.
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
file_url:
|
||||
type: string
|
||||
description: >-
|
||||
The URL of the file to be sent to the model.
|
||||
filename:
|
||||
type: string
|
||||
description: >-
|
||||
The name of the file to be sent to the model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentFile
|
||||
description: >-
|
||||
File content for input messages in OpenAI response format.
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6433,6 +6920,10 @@ components:
|
|||
default: input_image
|
||||
description: >-
|
||||
Content type identifier, always "input_image"
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
image_url:
|
||||
type: string
|
||||
description: (Optional) URL of the image content
|
||||
|
|
@ -6703,6 +7194,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -7042,6 +7537,30 @@ components:
|
|||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
description: >-
|
||||
Web search tool call output message for OpenAI responses.
|
||||
OpenAIResponsePrompt:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier of the prompt template
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
description: >-
|
||||
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||
for template substitution. The substitution values can either be strings,
|
||||
or other Response input types like images or files.
|
||||
version:
|
||||
type: string
|
||||
description: >-
|
||||
Version number of the prompt to use (defaults to latest if not specified)
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
title: OpenAIResponsePrompt
|
||||
description: >-
|
||||
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||
OpenAIResponseText:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -7299,6 +7818,10 @@ components:
|
|||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Prompt object with ID, version, and variables.
|
||||
instructions:
|
||||
type: string
|
||||
previous_response_id:
|
||||
|
|
@ -7376,6 +7899,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -10196,6 +10723,19 @@ tags:
|
|||
|
||||
- **Responses API**: Use the stable v1 Responses API endpoints
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: DatasetIO
|
||||
|
|
@ -10241,6 +10781,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- DatasetIO
|
||||
- Datasets
|
||||
|
|
|
|||
|
|
@ -2376,7 +2376,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
@ -2866,7 +2865,7 @@
|
|||
"const": "memory_retrieval",
|
||||
"default": "memory_retrieval"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"vector_store_ids": {
|
||||
"type": "string",
|
||||
"description": "The IDs of the vector databases to retrieve context from."
|
||||
},
|
||||
|
|
@ -2880,7 +2879,7 @@
|
|||
"turn_id",
|
||||
"step_id",
|
||||
"step_type",
|
||||
"vector_db_ids",
|
||||
"vector_store_ids",
|
||||
"inserted_context"
|
||||
],
|
||||
"title": "MemoryRetrievalStep",
|
||||
|
|
|
|||
|
|
@ -1695,7 +1695,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
@ -2086,7 +2085,7 @@ components:
|
|||
description: Type of the step in an agent turn.
|
||||
const: memory_retrieval
|
||||
default: memory_retrieval
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: string
|
||||
description: >-
|
||||
The IDs of the vector databases to retrieve context from.
|
||||
|
|
@ -2099,7 +2098,7 @@ components:
|
|||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
- inserted_context
|
||||
title: MemoryRetrievalStep
|
||||
description: >-
|
||||
|
|
|
|||
761
docs/static/llama-stack-spec.html
vendored
761
docs/static/llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -4005,6 +4192,451 @@
|
|||
"title": "Error",
|
||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -5696,16 +6328,53 @@
|
|||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInputMessageContentFile": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_file",
|
||||
"default": "input_file",
|
||||
"description": "The type of the input item. Always `input_file`."
|
||||
},
|
||||
"file_data": {
|
||||
"type": "string",
|
||||
"description": "The data of the file to be sent to the model."
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"file_url": {
|
||||
"type": "string",
|
||||
"description": "The URL of the file to be sent to the model."
|
||||
},
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "The name of the file to be sent to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentFile",
|
||||
"description": "File content for input messages in OpenAI response format."
|
||||
},
|
||||
"OpenAIResponseInputMessageContentImage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -5733,6 +6402,10 @@
|
|||
"default": "input_image",
|
||||
"description": "Content type identifier, always \"input_image\""
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string",
|
||||
"description": "(Optional) URL of the image content"
|
||||
|
|
@ -7305,29 +7978,14 @@
|
|||
"OpenAIResponseInput": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||
}
|
||||
|
|
@ -7536,6 +8194,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -7631,6 +8293,32 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponsePrompt": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier of the prompt template"
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||
},
|
||||
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id"
|
||||
],
|
||||
"title": "OpenAIResponsePrompt",
|
||||
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||
},
|
||||
"OpenAIResponseText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -8001,6 +8689,10 @@
|
|||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
|
|
@ -8089,6 +8781,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -11427,7 +12123,7 @@
|
|||
},
|
||||
"description": "List of documents to index in the RAG system"
|
||||
},
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the vector database to store the document embeddings"
|
||||
},
|
||||
|
|
@ -11439,7 +12135,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"documents",
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"chunk_size_in_tokens"
|
||||
],
|
||||
"title": "InsertRequest"
|
||||
|
|
@ -11630,7 +12326,7 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The query content to search for in the indexed documents"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"vector_store_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
|
|
@ -11645,7 +12341,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"vector_db_ids"
|
||||
"vector_store_ids"
|
||||
],
|
||||
"title": "QueryRequest"
|
||||
},
|
||||
|
|
@ -11833,6 +12529,10 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"chunk_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
|
|
@ -11866,10 +12566,6 @@
|
|||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
},
|
||||
"stored_chunk_id": {
|
||||
"type": "string",
|
||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
||||
},
|
||||
"chunk_metadata": {
|
||||
"$ref": "#/components/schemas/ChunkMetadata",
|
||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||
|
|
@ -11878,6 +12574,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"chunk_id",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk",
|
||||
|
|
@ -11938,7 +12635,7 @@
|
|||
"InsertChunksRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the vector database to insert the chunks into."
|
||||
},
|
||||
|
|
@ -11956,7 +12653,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"chunks"
|
||||
],
|
||||
"title": "InsertChunksRequest"
|
||||
|
|
@ -11964,7 +12661,7 @@
|
|||
"QueryChunksRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the vector database to query."
|
||||
},
|
||||
|
|
@ -12001,7 +12698,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"query"
|
||||
],
|
||||
"title": "QueryChunksRequest"
|
||||
|
|
@ -13224,6 +13921,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Conversations",
|
||||
"description": "Protocol for conversation management operations.",
|
||||
|
|
@ -13297,6 +13999,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Conversations",
|
||||
"Files",
|
||||
"Inference",
|
||||
|
|
|
|||
579
docs/static/llama-stack-spec.yaml
vendored
579
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -12,6 +12,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -2999,6 +3134,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -4261,11 +4721,44 @@ components:
|
|||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
OpenAIResponseInputMessageContentFile:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: input_file
|
||||
default: input_file
|
||||
description: >-
|
||||
The type of the input item. Always `input_file`.
|
||||
file_data:
|
||||
type: string
|
||||
description: >-
|
||||
The data of the file to be sent to the model.
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
file_url:
|
||||
type: string
|
||||
description: >-
|
||||
The URL of the file to be sent to the model.
|
||||
filename:
|
||||
type: string
|
||||
description: >-
|
||||
The name of the file to be sent to the model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentFile
|
||||
description: >-
|
||||
File content for input messages in OpenAI response format.
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -4286,6 +4779,10 @@ components:
|
|||
default: input_image
|
||||
description: >-
|
||||
Content type identifier, always "input_image"
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
image_url:
|
||||
type: string
|
||||
description: (Optional) URL of the image content
|
||||
|
|
@ -5522,14 +6019,9 @@ components:
|
|||
Error details for failed OpenAI response requests.
|
||||
OpenAIResponseInput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
OpenAIResponseInputToolFileSearch:
|
||||
type: object
|
||||
|
|
@ -5685,6 +6177,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -5758,6 +6254,30 @@ components:
|
|||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
OpenAIResponsePrompt:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier of the prompt template
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
description: >-
|
||||
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||
for template substitution. The substitution values can either be strings,
|
||||
or other Response input types like images or files.
|
||||
version:
|
||||
type: string
|
||||
description: >-
|
||||
Version number of the prompt to use (defaults to latest if not specified)
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
title: OpenAIResponsePrompt
|
||||
description: >-
|
||||
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||
OpenAIResponseText:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -6015,6 +6535,10 @@ components:
|
|||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Prompt object with ID, version, and variables.
|
||||
instructions:
|
||||
type: string
|
||||
previous_response_id:
|
||||
|
|
@ -6092,6 +6616,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -8654,7 +9182,7 @@ components:
|
|||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
|
|
@ -8665,7 +9193,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
|
|
@ -8836,7 +9364,7 @@ components:
|
|||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
|
@ -8849,7 +9377,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
|
|
@ -8977,6 +9505,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -8997,10 +9529,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -9009,6 +9537,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -9073,7 +9602,7 @@ components:
|
|||
InsertChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to insert the chunks into.
|
||||
|
|
@ -9092,13 +9621,13 @@ components:
|
|||
description: The time to live of the chunks.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunks
|
||||
title: InsertChunksRequest
|
||||
QueryChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to query.
|
||||
|
|
@ -9118,7 +9647,7 @@ components:
|
|||
description: The parameters of the query.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- query
|
||||
title: QueryChunksRequest
|
||||
QueryChunksResponse:
|
||||
|
|
@ -10075,6 +10604,19 @@ tags:
|
|||
|
||||
- `background`
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Conversations
|
||||
description: >-
|
||||
Protocol for conversation management operations.
|
||||
|
|
@ -10137,6 +10679,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Conversations
|
||||
- Files
|
||||
- Inference
|
||||
|
|
|
|||
766
docs/static/stainless-llama-stack-spec.html
vendored
766
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
|||
}
|
||||
],
|
||||
"paths": {
|
||||
"/v1/batches": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "A list of batch objects.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "List all batches for the current user.",
|
||||
"description": "List all batches for the current user.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "after",
|
||||
"in": "query",
|
||||
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||
"required": false,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "limit",
|
||||
"in": "query",
|
||||
"description": "Number of batches to return (default 20, max 100).",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "integer"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
},
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The created batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Create a new batch for processing multiple API requests.",
|
||||
"description": "Create a new batch for processing multiple API requests.",
|
||||
"parameters": [],
|
||||
"requestBody": {
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||
}
|
||||
}
|
||||
},
|
||||
"required": true
|
||||
},
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}": {
|
||||
"get": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Retrieve information about a specific batch.",
|
||||
"description": "Retrieve information about a specific batch.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to retrieve.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/batches/{batch_id}/cancel": {
|
||||
"post": {
|
||||
"responses": {
|
||||
"200": {
|
||||
"description": "The updated batch object.",
|
||||
"content": {
|
||||
"application/json": {
|
||||
"schema": {
|
||||
"$ref": "#/components/schemas/Batch"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"400": {
|
||||
"$ref": "#/components/responses/BadRequest400"
|
||||
},
|
||||
"429": {
|
||||
"$ref": "#/components/responses/TooManyRequests429"
|
||||
},
|
||||
"500": {
|
||||
"$ref": "#/components/responses/InternalServerError500"
|
||||
},
|
||||
"default": {
|
||||
"$ref": "#/components/responses/DefaultError"
|
||||
}
|
||||
},
|
||||
"tags": [
|
||||
"Batches"
|
||||
],
|
||||
"summary": "Cancel a batch that is in progress.",
|
||||
"description": "Cancel a batch that is in progress.",
|
||||
"parameters": [
|
||||
{
|
||||
"name": "batch_id",
|
||||
"in": "path",
|
||||
"description": "The ID of the batch to cancel.",
|
||||
"required": true,
|
||||
"schema": {
|
||||
"type": "string"
|
||||
}
|
||||
}
|
||||
],
|
||||
"deprecated": false
|
||||
}
|
||||
},
|
||||
"/v1/chat/completions": {
|
||||
"get": {
|
||||
"responses": {
|
||||
|
|
@ -5677,6 +5864,451 @@
|
|||
"title": "Error",
|
||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||
},
|
||||
"ListBatchesResponse": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "list",
|
||||
"default": "list"
|
||||
},
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
}
|
||||
},
|
||||
"first_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"last_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"has_more": {
|
||||
"type": "boolean",
|
||||
"default": false
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"object",
|
||||
"data",
|
||||
"has_more"
|
||||
],
|
||||
"title": "ListBatchesResponse",
|
||||
"description": "Response containing a list of batch objects."
|
||||
},
|
||||
"CreateBatchRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_file_id": {
|
||||
"type": "string",
|
||||
"description": "The ID of an uploaded file containing requests for the batch."
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string",
|
||||
"description": "The endpoint to be used for all requests in the batch."
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string",
|
||||
"const": "24h",
|
||||
"description": "The time window within which the batch should be processed."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
},
|
||||
"description": "Optional metadata for the batch."
|
||||
},
|
||||
"idempotency_key": {
|
||||
"type": "string",
|
||||
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_file_id",
|
||||
"endpoint",
|
||||
"completion_window"
|
||||
],
|
||||
"title": "CreateBatchRequest"
|
||||
},
|
||||
"Batch": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string"
|
||||
},
|
||||
"completion_window": {
|
||||
"type": "string"
|
||||
},
|
||||
"created_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"endpoint": {
|
||||
"type": "string"
|
||||
},
|
||||
"input_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"object": {
|
||||
"type": "string",
|
||||
"const": "batch"
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
"validating",
|
||||
"failed",
|
||||
"in_progress",
|
||||
"finalizing",
|
||||
"completed",
|
||||
"expired",
|
||||
"cancelling",
|
||||
"cancelled"
|
||||
]
|
||||
},
|
||||
"cancelled_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"cancelling_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"completed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"error_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"errors": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"data": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"code": {
|
||||
"type": "string"
|
||||
},
|
||||
"line": {
|
||||
"type": "integer"
|
||||
},
|
||||
"message": {
|
||||
"type": "string"
|
||||
},
|
||||
"param": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "BatchError"
|
||||
}
|
||||
},
|
||||
"object": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"title": "Errors"
|
||||
},
|
||||
"expired_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"expires_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"finalizing_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"in_progress_at": {
|
||||
"type": "integer"
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"type": "string"
|
||||
}
|
||||
},
|
||||
"model": {
|
||||
"type": "string"
|
||||
},
|
||||
"output_file_id": {
|
||||
"type": "string"
|
||||
},
|
||||
"request_counts": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"completed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"failed": {
|
||||
"type": "integer"
|
||||
},
|
||||
"total": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"completed",
|
||||
"failed",
|
||||
"total"
|
||||
],
|
||||
"title": "BatchRequestCounts"
|
||||
},
|
||||
"usage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"input_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"input_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"cached_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"cached_tokens"
|
||||
],
|
||||
"title": "InputTokensDetails"
|
||||
},
|
||||
"output_tokens": {
|
||||
"type": "integer"
|
||||
},
|
||||
"output_tokens_details": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"reasoning_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"reasoning_tokens"
|
||||
],
|
||||
"title": "OutputTokensDetails"
|
||||
},
|
||||
"total_tokens": {
|
||||
"type": "integer"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"input_tokens",
|
||||
"input_tokens_details",
|
||||
"output_tokens",
|
||||
"output_tokens_details",
|
||||
"total_tokens"
|
||||
],
|
||||
"title": "BatchUsage"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id",
|
||||
"completion_window",
|
||||
"created_at",
|
||||
"endpoint",
|
||||
"input_file_id",
|
||||
"object",
|
||||
"status"
|
||||
],
|
||||
"title": "Batch"
|
||||
},
|
||||
"Order": {
|
||||
"type": "string",
|
||||
"enum": [
|
||||
|
|
@ -7368,16 +8000,53 @@
|
|||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
],
|
||||
"discriminator": {
|
||||
"propertyName": "type",
|
||||
"mapping": {
|
||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponseInputMessageContentFile": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"const": "input_file",
|
||||
"default": "input_file",
|
||||
"description": "The type of the input item. Always `input_file`."
|
||||
},
|
||||
"file_data": {
|
||||
"type": "string",
|
||||
"description": "The data of the file to be sent to the model."
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"file_url": {
|
||||
"type": "string",
|
||||
"description": "The URL of the file to be sent to the model."
|
||||
},
|
||||
"filename": {
|
||||
"type": "string",
|
||||
"description": "The name of the file to be sent to the model."
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"type"
|
||||
],
|
||||
"title": "OpenAIResponseInputMessageContentFile",
|
||||
"description": "File content for input messages in OpenAI response format."
|
||||
},
|
||||
"OpenAIResponseInputMessageContentImage": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -7405,6 +8074,10 @@
|
|||
"default": "input_image",
|
||||
"description": "Content type identifier, always \"input_image\""
|
||||
},
|
||||
"file_id": {
|
||||
"type": "string",
|
||||
"description": "(Optional) The ID of the file to be sent to the model."
|
||||
},
|
||||
"image_url": {
|
||||
"type": "string",
|
||||
"description": "(Optional) URL of the image content"
|
||||
|
|
@ -8977,29 +9650,14 @@
|
|||
"OpenAIResponseInput": {
|
||||
"oneOf": [
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
||||
},
|
||||
{
|
||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||
}
|
||||
|
|
@ -9208,6 +9866,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -9303,6 +9965,32 @@
|
|||
}
|
||||
}
|
||||
},
|
||||
"OpenAIResponsePrompt": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier of the prompt template"
|
||||
},
|
||||
"variables": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||
},
|
||||
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||
},
|
||||
"version": {
|
||||
"type": "string",
|
||||
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||
}
|
||||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"id"
|
||||
],
|
||||
"title": "OpenAIResponsePrompt",
|
||||
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||
},
|
||||
"OpenAIResponseText": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -9673,6 +10361,10 @@
|
|||
"type": "string",
|
||||
"description": "The underlying LLM used for completions."
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||
},
|
||||
"instructions": {
|
||||
"type": "string"
|
||||
},
|
||||
|
|
@ -9761,6 +10453,10 @@
|
|||
"type": "string",
|
||||
"description": "(Optional) ID of the previous response in a conversation"
|
||||
},
|
||||
"prompt": {
|
||||
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||
"description": "(Optional) Reference to a prompt template and its variables."
|
||||
},
|
||||
"status": {
|
||||
"type": "string",
|
||||
"description": "Current status of the response generation"
|
||||
|
|
@ -13099,7 +13795,7 @@
|
|||
},
|
||||
"description": "List of documents to index in the RAG system"
|
||||
},
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "ID of the vector database to store the document embeddings"
|
||||
},
|
||||
|
|
@ -13111,7 +13807,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"documents",
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"chunk_size_in_tokens"
|
||||
],
|
||||
"title": "InsertRequest"
|
||||
|
|
@ -13302,7 +13998,7 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The query content to search for in the indexed documents"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"vector_store_ids": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string"
|
||||
|
|
@ -13317,7 +14013,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"vector_db_ids"
|
||||
"vector_store_ids"
|
||||
],
|
||||
"title": "QueryRequest"
|
||||
},
|
||||
|
|
@ -13505,6 +14201,10 @@
|
|||
"$ref": "#/components/schemas/InterleavedContent",
|
||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||
},
|
||||
"chunk_id": {
|
||||
"type": "string",
|
||||
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||
},
|
||||
"metadata": {
|
||||
"type": "object",
|
||||
"additionalProperties": {
|
||||
|
|
@ -13538,10 +14238,6 @@
|
|||
},
|
||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||
},
|
||||
"stored_chunk_id": {
|
||||
"type": "string",
|
||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
||||
},
|
||||
"chunk_metadata": {
|
||||
"$ref": "#/components/schemas/ChunkMetadata",
|
||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||
|
|
@ -13550,6 +14246,7 @@
|
|||
"additionalProperties": false,
|
||||
"required": [
|
||||
"content",
|
||||
"chunk_id",
|
||||
"metadata"
|
||||
],
|
||||
"title": "Chunk",
|
||||
|
|
@ -13610,7 +14307,7 @@
|
|||
"InsertChunksRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the vector database to insert the chunks into."
|
||||
},
|
||||
|
|
@ -13628,7 +14325,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"chunks"
|
||||
],
|
||||
"title": "InsertChunksRequest"
|
||||
|
|
@ -13636,7 +14333,7 @@
|
|||
"QueryChunksRequest": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"vector_db_id": {
|
||||
"vector_store_id": {
|
||||
"type": "string",
|
||||
"description": "The identifier of the vector database to query."
|
||||
},
|
||||
|
|
@ -13673,7 +14370,7 @@
|
|||
},
|
||||
"additionalProperties": false,
|
||||
"required": [
|
||||
"vector_db_id",
|
||||
"vector_store_id",
|
||||
"query"
|
||||
],
|
||||
"title": "QueryChunksRequest"
|
||||
|
|
@ -15452,7 +16149,6 @@
|
|||
},
|
||||
"max_tokens": {
|
||||
"type": "integer",
|
||||
"default": 0,
|
||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||
},
|
||||
"repetition_penalty": {
|
||||
|
|
@ -15735,7 +16431,7 @@
|
|||
"const": "memory_retrieval",
|
||||
"default": "memory_retrieval"
|
||||
},
|
||||
"vector_db_ids": {
|
||||
"vector_store_ids": {
|
||||
"type": "string",
|
||||
"description": "The IDs of the vector databases to retrieve context from."
|
||||
},
|
||||
|
|
@ -15749,7 +16445,7 @@
|
|||
"turn_id",
|
||||
"step_id",
|
||||
"step_type",
|
||||
"vector_db_ids",
|
||||
"vector_store_ids",
|
||||
"inserted_context"
|
||||
],
|
||||
"title": "MemoryRetrievalStep",
|
||||
|
|
@ -17897,6 +18593,11 @@
|
|||
"description": "APIs for creating and interacting with agentic systems.",
|
||||
"x-displayName": "Agents"
|
||||
},
|
||||
{
|
||||
"name": "Batches",
|
||||
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||
},
|
||||
{
|
||||
"name": "Benchmarks",
|
||||
"description": ""
|
||||
|
|
@ -17991,6 +18692,7 @@
|
|||
"name": "Operations",
|
||||
"tags": [
|
||||
"Agents",
|
||||
"Batches",
|
||||
"Benchmarks",
|
||||
"Conversations",
|
||||
"DatasetIO",
|
||||
|
|
|
|||
584
docs/static/stainless-llama-stack-spec.yaml
vendored
584
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -15,6 +15,141 @@ info:
|
|||
servers:
|
||||
- url: http://any-hosted-llama-stack.com
|
||||
paths:
|
||||
/v1/batches:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: A list of batch objects.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/ListBatchesResponse'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: List all batches for the current user.
|
||||
description: List all batches for the current user.
|
||||
parameters:
|
||||
- name: after
|
||||
in: query
|
||||
description: >-
|
||||
A cursor for pagination; returns batches after this batch ID.
|
||||
required: false
|
||||
schema:
|
||||
type: string
|
||||
- name: limit
|
||||
in: query
|
||||
description: >-
|
||||
Number of batches to return (default 20, max 100).
|
||||
required: true
|
||||
schema:
|
||||
type: integer
|
||||
deprecated: false
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The created batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
description: >-
|
||||
Create a new batch for processing multiple API requests.
|
||||
parameters: []
|
||||
requestBody:
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/CreateBatchRequest'
|
||||
required: true
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}:
|
||||
get:
|
||||
responses:
|
||||
'200':
|
||||
description: The batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: >-
|
||||
Retrieve information about a specific batch.
|
||||
description: >-
|
||||
Retrieve information about a specific batch.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to retrieve.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/batches/{batch_id}/cancel:
|
||||
post:
|
||||
responses:
|
||||
'200':
|
||||
description: The updated batch object.
|
||||
content:
|
||||
application/json:
|
||||
schema:
|
||||
$ref: '#/components/schemas/Batch'
|
||||
'400':
|
||||
$ref: '#/components/responses/BadRequest400'
|
||||
'429':
|
||||
$ref: >-
|
||||
#/components/responses/TooManyRequests429
|
||||
'500':
|
||||
$ref: >-
|
||||
#/components/responses/InternalServerError500
|
||||
default:
|
||||
$ref: '#/components/responses/DefaultError'
|
||||
tags:
|
||||
- Batches
|
||||
summary: Cancel a batch that is in progress.
|
||||
description: Cancel a batch that is in progress.
|
||||
parameters:
|
||||
- name: batch_id
|
||||
in: path
|
||||
description: The ID of the batch to cancel.
|
||||
required: true
|
||||
schema:
|
||||
type: string
|
||||
deprecated: false
|
||||
/v1/chat/completions:
|
||||
get:
|
||||
responses:
|
||||
|
|
@ -4212,6 +4347,331 @@ components:
|
|||
title: Error
|
||||
description: >-
|
||||
Error response from the API. Roughly follows RFC 7807.
|
||||
ListBatchesResponse:
|
||||
type: object
|
||||
properties:
|
||||
object:
|
||||
type: string
|
||||
const: list
|
||||
default: list
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
first_id:
|
||||
type: string
|
||||
last_id:
|
||||
type: string
|
||||
has_more:
|
||||
type: boolean
|
||||
default: false
|
||||
additionalProperties: false
|
||||
required:
|
||||
- object
|
||||
- data
|
||||
- has_more
|
||||
title: ListBatchesResponse
|
||||
description: >-
|
||||
Response containing a list of batch objects.
|
||||
CreateBatchRequest:
|
||||
type: object
|
||||
properties:
|
||||
input_file_id:
|
||||
type: string
|
||||
description: >-
|
||||
The ID of an uploaded file containing requests for the batch.
|
||||
endpoint:
|
||||
type: string
|
||||
description: >-
|
||||
The endpoint to be used for all requests in the batch.
|
||||
completion_window:
|
||||
type: string
|
||||
const: 24h
|
||||
description: >-
|
||||
The time window within which the batch should be processed.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: Optional metadata for the batch.
|
||||
idempotency_key:
|
||||
type: string
|
||||
description: >-
|
||||
Optional idempotency key. When provided, enables idempotent behavior.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_file_id
|
||||
- endpoint
|
||||
- completion_window
|
||||
title: CreateBatchRequest
|
||||
Batch:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
completion_window:
|
||||
type: string
|
||||
created_at:
|
||||
type: integer
|
||||
endpoint:
|
||||
type: string
|
||||
input_file_id:
|
||||
type: string
|
||||
object:
|
||||
type: string
|
||||
const: batch
|
||||
status:
|
||||
type: string
|
||||
enum:
|
||||
- validating
|
||||
- failed
|
||||
- in_progress
|
||||
- finalizing
|
||||
- completed
|
||||
- expired
|
||||
- cancelling
|
||||
- cancelled
|
||||
cancelled_at:
|
||||
type: integer
|
||||
cancelling_at:
|
||||
type: integer
|
||||
completed_at:
|
||||
type: integer
|
||||
error_file_id:
|
||||
type: string
|
||||
errors:
|
||||
type: object
|
||||
properties:
|
||||
data:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
code:
|
||||
type: string
|
||||
line:
|
||||
type: integer
|
||||
message:
|
||||
type: string
|
||||
param:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: BatchError
|
||||
object:
|
||||
type: string
|
||||
additionalProperties: false
|
||||
title: Errors
|
||||
expired_at:
|
||||
type: integer
|
||||
expires_at:
|
||||
type: integer
|
||||
failed_at:
|
||||
type: integer
|
||||
finalizing_at:
|
||||
type: integer
|
||||
in_progress_at:
|
||||
type: integer
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
type: string
|
||||
model:
|
||||
type: string
|
||||
output_file_id:
|
||||
type: string
|
||||
request_counts:
|
||||
type: object
|
||||
properties:
|
||||
completed:
|
||||
type: integer
|
||||
failed:
|
||||
type: integer
|
||||
total:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- completed
|
||||
- failed
|
||||
- total
|
||||
title: BatchRequestCounts
|
||||
usage:
|
||||
type: object
|
||||
properties:
|
||||
input_tokens:
|
||||
type: integer
|
||||
input_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
cached_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- cached_tokens
|
||||
title: InputTokensDetails
|
||||
output_tokens:
|
||||
type: integer
|
||||
output_tokens_details:
|
||||
type: object
|
||||
properties:
|
||||
reasoning_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- reasoning_tokens
|
||||
title: OutputTokensDetails
|
||||
total_tokens:
|
||||
type: integer
|
||||
additionalProperties: false
|
||||
required:
|
||||
- input_tokens
|
||||
- input_tokens_details
|
||||
- output_tokens
|
||||
- output_tokens_details
|
||||
- total_tokens
|
||||
title: BatchUsage
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
- completion_window
|
||||
- created_at
|
||||
- endpoint
|
||||
- input_file_id
|
||||
- object
|
||||
- status
|
||||
title: Batch
|
||||
Order:
|
||||
type: string
|
||||
enum:
|
||||
|
|
@ -5474,11 +5934,44 @@ components:
|
|||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
discriminator:
|
||||
propertyName: type
|
||||
mapping:
|
||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||
OpenAIResponseInputMessageContentFile:
|
||||
type: object
|
||||
properties:
|
||||
type:
|
||||
type: string
|
||||
const: input_file
|
||||
default: input_file
|
||||
description: >-
|
||||
The type of the input item. Always `input_file`.
|
||||
file_data:
|
||||
type: string
|
||||
description: >-
|
||||
The data of the file to be sent to the model.
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
file_url:
|
||||
type: string
|
||||
description: >-
|
||||
The URL of the file to be sent to the model.
|
||||
filename:
|
||||
type: string
|
||||
description: >-
|
||||
The name of the file to be sent to the model.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- type
|
||||
title: OpenAIResponseInputMessageContentFile
|
||||
description: >-
|
||||
File content for input messages in OpenAI response format.
|
||||
OpenAIResponseInputMessageContentImage:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -5499,6 +5992,10 @@ components:
|
|||
default: input_image
|
||||
description: >-
|
||||
Content type identifier, always "input_image"
|
||||
file_id:
|
||||
type: string
|
||||
description: >-
|
||||
(Optional) The ID of the file to be sent to the model.
|
||||
image_url:
|
||||
type: string
|
||||
description: (Optional) URL of the image content
|
||||
|
|
@ -6735,14 +7232,9 @@ components:
|
|||
Error details for failed OpenAI response requests.
|
||||
OpenAIResponseInput:
|
||||
oneOf:
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||
OpenAIResponseInputToolFileSearch:
|
||||
type: object
|
||||
|
|
@ -6898,6 +7390,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -6971,6 +7467,30 @@ components:
|
|||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||
OpenAIResponsePrompt:
|
||||
type: object
|
||||
properties:
|
||||
id:
|
||||
type: string
|
||||
description: Unique identifier of the prompt template
|
||||
variables:
|
||||
type: object
|
||||
additionalProperties:
|
||||
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||
description: >-
|
||||
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||
for template substitution. The substitution values can either be strings,
|
||||
or other Response input types like images or files.
|
||||
version:
|
||||
type: string
|
||||
description: >-
|
||||
Version number of the prompt to use (defaults to latest if not specified)
|
||||
additionalProperties: false
|
||||
required:
|
||||
- id
|
||||
title: OpenAIResponsePrompt
|
||||
description: >-
|
||||
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||
OpenAIResponseText:
|
||||
type: object
|
||||
properties:
|
||||
|
|
@ -7228,6 +7748,10 @@ components:
|
|||
model:
|
||||
type: string
|
||||
description: The underlying LLM used for completions.
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Prompt object with ID, version, and variables.
|
||||
instructions:
|
||||
type: string
|
||||
previous_response_id:
|
||||
|
|
@ -7305,6 +7829,10 @@ components:
|
|||
type: string
|
||||
description: >-
|
||||
(Optional) ID of the previous response in a conversation
|
||||
prompt:
|
||||
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||
description: >-
|
||||
(Optional) Reference to a prompt template and its variables.
|
||||
status:
|
||||
type: string
|
||||
description: >-
|
||||
|
|
@ -9867,7 +10395,7 @@ components:
|
|||
$ref: '#/components/schemas/RAGDocument'
|
||||
description: >-
|
||||
List of documents to index in the RAG system
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
ID of the vector database to store the document embeddings
|
||||
|
|
@ -9878,7 +10406,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- documents
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunk_size_in_tokens
|
||||
title: InsertRequest
|
||||
DefaultRAGQueryGeneratorConfig:
|
||||
|
|
@ -10049,7 +10577,7 @@ components:
|
|||
$ref: '#/components/schemas/InterleavedContent'
|
||||
description: >-
|
||||
The query content to search for in the indexed documents
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: array
|
||||
items:
|
||||
type: string
|
||||
|
|
@ -10062,7 +10590,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
title: QueryRequest
|
||||
RAGQueryResult:
|
||||
type: object
|
||||
|
|
@ -10190,6 +10718,10 @@ components:
|
|||
description: >-
|
||||
The content of the chunk, which can be interleaved text, images, or other
|
||||
types.
|
||||
chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
Unique identifier for the chunk. Must be provided explicitly.
|
||||
metadata:
|
||||
type: object
|
||||
additionalProperties:
|
||||
|
|
@ -10210,10 +10742,6 @@ components:
|
|||
description: >-
|
||||
Optional embedding for the chunk. If not provided, it will be computed
|
||||
later.
|
||||
stored_chunk_id:
|
||||
type: string
|
||||
description: >-
|
||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
||||
chunk_metadata:
|
||||
$ref: '#/components/schemas/ChunkMetadata'
|
||||
description: >-
|
||||
|
|
@ -10222,6 +10750,7 @@ components:
|
|||
additionalProperties: false
|
||||
required:
|
||||
- content
|
||||
- chunk_id
|
||||
- metadata
|
||||
title: Chunk
|
||||
description: >-
|
||||
|
|
@ -10286,7 +10815,7 @@ components:
|
|||
InsertChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to insert the chunks into.
|
||||
|
|
@ -10305,13 +10834,13 @@ components:
|
|||
description: The time to live of the chunks.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- chunks
|
||||
title: InsertChunksRequest
|
||||
QueryChunksRequest:
|
||||
type: object
|
||||
properties:
|
||||
vector_db_id:
|
||||
vector_store_id:
|
||||
type: string
|
||||
description: >-
|
||||
The identifier of the vector database to query.
|
||||
|
|
@ -10331,7 +10860,7 @@ components:
|
|||
description: The parameters of the query.
|
||||
additionalProperties: false
|
||||
required:
|
||||
- vector_db_id
|
||||
- vector_store_id
|
||||
- query
|
||||
title: QueryChunksRequest
|
||||
QueryChunksResponse:
|
||||
|
|
@ -11600,7 +12129,6 @@ components:
|
|||
description: The sampling strategy.
|
||||
max_tokens:
|
||||
type: integer
|
||||
default: 0
|
||||
description: >-
|
||||
The maximum number of tokens that can be generated in the completion.
|
||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||
|
|
@ -11850,7 +12378,7 @@ components:
|
|||
description: Type of the step in an agent turn.
|
||||
const: memory_retrieval
|
||||
default: memory_retrieval
|
||||
vector_db_ids:
|
||||
vector_store_ids:
|
||||
type: string
|
||||
description: >-
|
||||
The IDs of the vector databases to retrieve context from.
|
||||
|
|
@ -11863,7 +12391,7 @@ components:
|
|||
- turn_id
|
||||
- step_id
|
||||
- step_type
|
||||
- vector_db_ids
|
||||
- vector_store_ids
|
||||
- inserted_context
|
||||
title: MemoryRetrievalStep
|
||||
description: >-
|
||||
|
|
@ -13460,6 +13988,19 @@ tags:
|
|||
description: >-
|
||||
APIs for creating and interacting with agentic systems.
|
||||
x-displayName: Agents
|
||||
- name: Batches
|
||||
description: >-
|
||||
The API is designed to allow use of openai client libraries for seamless integration.
|
||||
|
||||
|
||||
This API provides the following extensions:
|
||||
- idempotent batch creation
|
||||
|
||||
Note: This API is currently under active development and may undergo changes.
|
||||
x-displayName: >-
|
||||
The Batches API enables efficient processing of multiple requests in a single
|
||||
operation, particularly useful for processing large datasets, batch evaluation
|
||||
workflows, and cost-effective inference at scale.
|
||||
- name: Benchmarks
|
||||
description: ''
|
||||
- name: Conversations
|
||||
|
|
@ -13534,6 +14075,7 @@ x-tagGroups:
|
|||
- name: Operations
|
||||
tags:
|
||||
- Agents
|
||||
- Batches
|
||||
- Benchmarks
|
||||
- Conversations
|
||||
- DatasetIO
|
||||
|
|
|
|||
|
|
@ -1,7 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from .telemetry import *
|
||||
|
|
@ -1,250 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
from opentelemetry import metrics, trace
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.sdk.metrics import MeterProvider
|
||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
||||
|
||||
from llama_stack.apis.telemetry import (
|
||||
Event,
|
||||
MetricEvent,
|
||||
SpanEndPayload,
|
||||
SpanStartPayload,
|
||||
SpanStatus,
|
||||
StructuredLogEvent,
|
||||
UnstructuredLogEvent,
|
||||
)
|
||||
from llama_stack.apis.telemetry import (
|
||||
Telemetry as TelemetryBase,
|
||||
)
|
||||
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
|
||||
from llama_stack.log import get_logger
|
||||
|
||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
||||
"active_spans": {},
|
||||
"counters": {},
|
||||
"gauges": {},
|
||||
"up_down_counters": {},
|
||||
}
|
||||
_global_lock = threading.Lock()
|
||||
_TRACER_PROVIDER = None
|
||||
|
||||
logger = get_logger(name=__name__, category="telemetry")
|
||||
|
||||
|
||||
def is_tracing_enabled(tracer):
|
||||
with tracer.start_as_current_span("check_tracing") as span:
|
||||
return span.is_recording()
|
||||
|
||||
|
||||
class Telemetry(TelemetryBase):
|
||||
def __init__(self) -> None:
|
||||
self.meter = None
|
||||
|
||||
global _TRACER_PROVIDER
|
||||
# Initialize the correct span processor based on the provider state.
|
||||
# This is needed since once the span processor is set, it cannot be unset.
|
||||
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
|
||||
# Since the library client can be recreated multiple times in a notebook,
|
||||
# the kernel will hold on to the span processor and cause duplicate spans to be written.
|
||||
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
||||
if _TRACER_PROVIDER is None:
|
||||
provider = TracerProvider()
|
||||
trace.set_tracer_provider(provider)
|
||||
_TRACER_PROVIDER = provider
|
||||
|
||||
# Use single OTLP endpoint for all telemetry signals
|
||||
|
||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
||||
span_exporter = OTLPSpanExporter()
|
||||
span_processor = BatchSpanProcessor(span_exporter)
|
||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
||||
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
||||
metrics.set_meter_provider(metric_provider)
|
||||
self.is_otel_endpoint_set = True
|
||||
else:
|
||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
||||
self.is_otel_endpoint_set = False
|
||||
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
self._lock = _global_lock
|
||||
|
||||
async def initialize(self) -> None:
|
||||
pass
|
||||
|
||||
async def shutdown(self) -> None:
|
||||
if self.is_otel_endpoint_set:
|
||||
trace.get_tracer_provider().force_flush()
|
||||
|
||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
||||
if isinstance(event, UnstructuredLogEvent):
|
||||
self._log_unstructured(event, ttl_seconds)
|
||||
elif isinstance(event, MetricEvent):
|
||||
self._log_metric(event)
|
||||
elif isinstance(event, StructuredLogEvent):
|
||||
self._log_structured(event, ttl_seconds)
|
||||
else:
|
||||
raise ValueError(f"Unknown event type: {event}")
|
||||
|
||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
# Use global storage instead of instance storage
|
||||
span_id = int(event.span_id, 16)
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
|
||||
if span:
|
||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
||||
span.add_event(
|
||||
name=event.type.value,
|
||||
attributes={
|
||||
"message": event.message,
|
||||
"severity": event.severity.value,
|
||||
"__ttl__": ttl_seconds,
|
||||
**(event.attributes or {}),
|
||||
},
|
||||
timestamp=timestamp_ns,
|
||||
)
|
||||
else:
|
||||
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
|
||||
|
||||
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["counters"]:
|
||||
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Counter for {name}",
|
||||
)
|
||||
return _GLOBAL_STORAGE["counters"][name]
|
||||
|
||||
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["gauges"]:
|
||||
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"Gauge for {name}",
|
||||
)
|
||||
return _GLOBAL_STORAGE["gauges"][name]
|
||||
|
||||
def _log_metric(self, event: MetricEvent) -> None:
|
||||
# Add metric as an event to the current span
|
||||
try:
|
||||
with self._lock:
|
||||
# Only try to add to span if we have a valid span_id
|
||||
if event.span_id:
|
||||
try:
|
||||
span_id = int(event.span_id, 16)
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
|
||||
if span:
|
||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
||||
span.add_event(
|
||||
name=f"metric.{event.metric}",
|
||||
attributes={
|
||||
"value": event.value,
|
||||
"unit": event.unit,
|
||||
**(event.attributes or {}),
|
||||
},
|
||||
timestamp=timestamp_ns,
|
||||
)
|
||||
except (ValueError, KeyError):
|
||||
# Invalid span_id or span not found, but we already logged to console above
|
||||
pass
|
||||
except Exception:
|
||||
# Lock acquisition failed
|
||||
logger.debug("Failed to acquire lock to add metric to span")
|
||||
|
||||
# Log to OpenTelemetry meter if available
|
||||
if self.meter is None:
|
||||
return
|
||||
if isinstance(event.value, int):
|
||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
||||
counter.add(event.value, attributes=event.attributes)
|
||||
elif isinstance(event.value, float):
|
||||
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
|
||||
up_down_counter.add(event.value, attributes=event.attributes)
|
||||
|
||||
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
|
||||
assert self.meter is not None
|
||||
if name not in _GLOBAL_STORAGE["up_down_counters"]:
|
||||
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
|
||||
name=name,
|
||||
unit=unit,
|
||||
description=f"UpDownCounter for {name}",
|
||||
)
|
||||
return _GLOBAL_STORAGE["up_down_counters"][name]
|
||||
|
||||
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
|
||||
with self._lock:
|
||||
span_id = int(event.span_id, 16)
|
||||
tracer = trace.get_tracer(__name__)
|
||||
if event.attributes is None:
|
||||
event.attributes = {}
|
||||
event.attributes["__ttl__"] = ttl_seconds
|
||||
|
||||
# Extract these W3C trace context attributes so they are not written to
|
||||
# underlying storage, as we just need them to propagate the trace context.
|
||||
traceparent = event.attributes.pop("traceparent", None)
|
||||
tracestate = event.attributes.pop("tracestate", None)
|
||||
if traceparent:
|
||||
# If we have a traceparent header value, we're not the root span.
|
||||
for root_attribute in ROOT_SPAN_MARKERS:
|
||||
event.attributes.pop(root_attribute, None)
|
||||
|
||||
if isinstance(event.payload, SpanStartPayload):
|
||||
# Check if span already exists to prevent duplicates
|
||||
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
||||
return
|
||||
|
||||
context = None
|
||||
if event.payload.parent_span_id:
|
||||
parent_span_id = int(event.payload.parent_span_id, 16)
|
||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
||||
context = trace.set_span_in_context(parent_span)
|
||||
elif traceparent:
|
||||
carrier = {
|
||||
"traceparent": traceparent,
|
||||
"tracestate": tracestate,
|
||||
}
|
||||
context = TraceContextTextMapPropagator().extract(carrier=carrier)
|
||||
|
||||
span = tracer.start_span(
|
||||
name=event.payload.name,
|
||||
context=context,
|
||||
attributes=event.attributes or {},
|
||||
)
|
||||
_GLOBAL_STORAGE["active_spans"][span_id] = span
|
||||
|
||||
elif isinstance(event.payload, SpanEndPayload):
|
||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
||||
if span:
|
||||
if event.attributes:
|
||||
span.set_attributes(event.attributes)
|
||||
|
||||
status = (
|
||||
trace.Status(status_code=trace.StatusCode.OK)
|
||||
if event.payload.status == SpanStatus.OK
|
||||
else trace.Status(status_code=trace.StatusCode.ERROR)
|
||||
)
|
||||
span.set_status(status)
|
||||
span.end()
|
||||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
||||
else:
|
||||
raise ValueError(f"Unknown structured log event: {event}")
|
||||
|
|
@ -1,40 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import AsyncGenerator
|
||||
from contextvars import ContextVar
|
||||
|
||||
|
||||
def preserve_contexts_async_generator[T](
|
||||
gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
|
||||
) -> AsyncGenerator[T, None]:
|
||||
"""
|
||||
Wraps an async generator to preserve context variables across iterations.
|
||||
This is needed because we start a new asyncio event loop for each streaming request,
|
||||
and we need to preserve the context across the event loop boundary.
|
||||
"""
|
||||
# Capture initial context values
|
||||
initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
|
||||
|
||||
async def wrapper() -> AsyncGenerator[T, None]:
|
||||
while True:
|
||||
try:
|
||||
# Restore context values before any await
|
||||
for context_var in context_vars:
|
||||
context_var.set(initial_context_values[context_var.name])
|
||||
|
||||
item = await gen.__anext__()
|
||||
|
||||
# Update our tracked values with any changes made during this iteration
|
||||
for context_var in context_vars:
|
||||
initial_context_values[context_var.name] = context_var.get()
|
||||
|
||||
yield item
|
||||
|
||||
except StopAsyncIteration:
|
||||
break
|
||||
|
||||
return wrapper()
|
||||
|
|
@ -1,61 +0,0 @@
|
|||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
||||
# All rights reserved.
|
||||
#
|
||||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
|
||||
from llama_stack.log import get_logger
|
||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
||||
|
||||
from . import NVIDIAConfig
|
||||
from .utils import _is_nvidia_hosted
|
||||
|
||||
logger = get_logger(name=__name__, category="inference::nvidia")
|
||||
|
||||
|
||||
class NVIDIAInferenceAdapter(OpenAIMixin):
|
||||
config: NVIDIAConfig
|
||||
|
||||
"""
|
||||
NVIDIA Inference Adapter for Llama Stack.
|
||||
"""
|
||||
|
||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
||||
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
||||
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
||||
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
||||
}
|
||||
|
||||
async def initialize(self) -> None:
|
||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
||||
|
||||
if _is_nvidia_hosted(self.config):
|
||||
if not self.config.auth_credential:
|
||||
raise RuntimeError(
|
||||
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
||||
)
|
||||
|
||||
def get_api_key(self) -> str:
|
||||
"""
|
||||
Get the API key for OpenAI mixin.
|
||||
|
||||
:return: The NVIDIA API key
|
||||
"""
|
||||
if self.config.auth_credential:
|
||||
return self.config.auth_credential.get_secret_value()
|
||||
|
||||
if not _is_nvidia_hosted(self.config):
|
||||
return "NO KEY REQUIRED"
|
||||
|
||||
return None
|
||||
|
||||
def get_base_url(self) -> str:
|
||||
"""
|
||||
Get the base URL for OpenAI mixin.
|
||||
|
||||
:return: The NVIDIA API base URL
|
||||
"""
|
||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
||||
204
pyproject.toml
204
pyproject.toml
|
|
@ -31,7 +31,7 @@ dependencies = [
|
|||
"jinja2>=3.1.6",
|
||||
"jsonschema",
|
||||
"llama-stack-client>=0.3.0",
|
||||
"openai>=1.107", # for expires_after support
|
||||
"openai>=2.5.0",
|
||||
"prompt-toolkit",
|
||||
"python-dotenv",
|
||||
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
|
||||
|
|
@ -67,17 +67,48 @@ dev = [
|
|||
"pytest-cov",
|
||||
"pytest-html",
|
||||
"pytest-json-report",
|
||||
"pytest-socket", # For blocking network access in unit tests
|
||||
"nbval", # For notebook testing
|
||||
"pytest-socket", # For blocking network access in unit tests
|
||||
"nbval", # For notebook testing
|
||||
"black",
|
||||
"ruff",
|
||||
"mypy",
|
||||
"pre-commit",
|
||||
"ruamel.yaml", # needed for openapi generator
|
||||
]
|
||||
# Type checking dependencies - includes type stubs and optional runtime dependencies
|
||||
# needed for complete mypy coverage across all optional features
|
||||
type_checking = [
|
||||
"types-requests",
|
||||
"types-setuptools",
|
||||
"pre-commit",
|
||||
"ruamel.yaml", # needed for openapi generator
|
||||
"types-jsonschema",
|
||||
"pandas-stubs",
|
||||
"types-psutil",
|
||||
"types-tqdm",
|
||||
"boto3-stubs[s3]",
|
||||
"streamlit",
|
||||
"streamlit-option-menu",
|
||||
"pandas",
|
||||
"anthropic",
|
||||
"databricks-sdk",
|
||||
"fairscale",
|
||||
"torchtune",
|
||||
"trl",
|
||||
"peft",
|
||||
"datasets",
|
||||
"together",
|
||||
"nest-asyncio",
|
||||
"pymongo",
|
||||
"torchvision",
|
||||
"sqlite-vec",
|
||||
"faiss-cpu",
|
||||
"lm-format-enforcer",
|
||||
"mcp",
|
||||
"ollama",
|
||||
]
|
||||
# These are the dependencies required for running unit tests.
|
||||
unit = [
|
||||
"anthropic",
|
||||
"databricks-sdk",
|
||||
"sqlite-vec",
|
||||
"ollama",
|
||||
"aiosqlite",
|
||||
|
|
@ -151,7 +182,7 @@ llama = "llama_stack.cli.llama:main"
|
|||
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
|
||||
|
||||
[tool.setuptools.packages.find]
|
||||
where = ["."]
|
||||
where = ["src"]
|
||||
include = ["llama_stack", "llama_stack.*"]
|
||||
|
||||
[[tool.uv.index]]
|
||||
|
|
@ -218,17 +249,17 @@ unfixable = [
|
|||
# Ignore the following errors for the following files
|
||||
[tool.ruff.lint.per-file-ignores]
|
||||
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
|
||||
"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
|
||||
"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
|
||||
"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
|
||||
"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
|
||||
"RUF001",
|
||||
"PLE2515",
|
||||
]
|
||||
"llama_stack/apis/**/__init__.py" = [
|
||||
"src/llama_stack/apis/**/__init__.py" = [
|
||||
"F403",
|
||||
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
|
||||
|
||||
[tool.mypy]
|
||||
mypy_path = ["llama_stack"]
|
||||
mypy_path = ["src"]
|
||||
packages = ["llama_stack"]
|
||||
plugins = ['pydantic.mypy']
|
||||
disable_error_code = []
|
||||
|
|
@ -240,82 +271,91 @@ follow_imports = "silent"
|
|||
# to exclude the entire directory.
|
||||
exclude = [
|
||||
# As we fix more and more of these, we should remove them from the list
|
||||
"^llama_stack.core/build\\.py$",
|
||||
"^llama_stack.core/client\\.py$",
|
||||
"^llama_stack.core/request_headers\\.py$",
|
||||
"^llama_stack.core/routers/",
|
||||
"^llama_stack.core/routing_tables/",
|
||||
"^llama_stack.core/server/endpoints\\.py$",
|
||||
"^llama_stack.core/server/server\\.py$",
|
||||
"^llama_stack.core/stack\\.py$",
|
||||
"^llama_stack.core/store/registry\\.py$",
|
||||
"^llama_stack.core/utils/exec\\.py$",
|
||||
"^llama_stack.core/utils/prompt_for_config\\.py$",
|
||||
"^llama_stack/models/llama/llama3/interface\\.py$",
|
||||
"^llama_stack/models/llama/llama3/tokenizer\\.py$",
|
||||
"^llama_stack/models/llama/llama3/tool_utils\\.py$",
|
||||
"^llama_stack/providers/inline/agents/meta_reference/",
|
||||
"^llama_stack/providers/inline/datasetio/localfs/",
|
||||
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
||||
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
||||
"^llama_stack/models/llama/llama3/generation\\.py$",
|
||||
"^llama_stack/models/llama/llama3/multimodal/model\\.py$",
|
||||
"^llama_stack/models/llama/llama4/",
|
||||
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
|
||||
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
||||
"^llama_stack/providers/inline/safety/code_scanner/",
|
||||
"^llama_stack/providers/inline/safety/llama_guard/",
|
||||
"^llama_stack/providers/inline/scoring/basic/",
|
||||
"^llama_stack/providers/inline/scoring/braintrust/",
|
||||
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
||||
"^llama_stack/providers/remote/agents/sample/",
|
||||
"^llama_stack/providers/remote/datasetio/huggingface/",
|
||||
"^llama_stack/providers/remote/datasetio/nvidia/",
|
||||
"^llama_stack/providers/remote/inference/bedrock/",
|
||||
"^llama_stack/providers/remote/inference/nvidia/",
|
||||
"^llama_stack/providers/remote/inference/passthrough/",
|
||||
"^llama_stack/providers/remote/inference/runpod/",
|
||||
"^llama_stack/providers/remote/inference/tgi/",
|
||||
"^llama_stack/providers/remote/inference/watsonx/",
|
||||
"^llama_stack/providers/remote/safety/bedrock/",
|
||||
"^llama_stack/providers/remote/safety/nvidia/",
|
||||
"^llama_stack/providers/remote/safety/sambanova/",
|
||||
"^llama_stack/providers/remote/safety/sample/",
|
||||
"^llama_stack/providers/remote/tool_runtime/bing_search/",
|
||||
"^llama_stack/providers/remote/tool_runtime/brave_search/",
|
||||
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
|
||||
"^llama_stack/providers/remote/tool_runtime/tavily_search/",
|
||||
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
|
||||
"^llama_stack/providers/remote/post_training/nvidia/",
|
||||
"^llama_stack/providers/remote/vector_io/chroma/",
|
||||
"^llama_stack/providers/remote/vector_io/milvus/",
|
||||
"^llama_stack/providers/remote/vector_io/pgvector/",
|
||||
"^llama_stack/providers/remote/vector_io/qdrant/",
|
||||
"^llama_stack/providers/remote/vector_io/sample/",
|
||||
"^llama_stack/providers/remote/vector_io/weaviate/",
|
||||
"^llama_stack/providers/utils/bedrock/client\\.py$",
|
||||
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
|
||||
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
|
||||
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
|
||||
"^llama_stack/providers/utils/inference/model_registry\\.py$",
|
||||
"^llama_stack/providers/utils/inference/openai_compat\\.py$",
|
||||
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
|
||||
"^llama_stack/providers/utils/kvstore/kvstore\\.py$",
|
||||
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
|
||||
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
|
||||
"^llama_stack/providers/utils/memory/vector_store\\.py$",
|
||||
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
|
||||
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
|
||||
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
|
||||
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
|
||||
"^llama_stack/providers/utils/telemetry/tracing\\.py$",
|
||||
"^llama_stack/strong_typing/auxiliary\\.py$",
|
||||
"^llama_stack/distributions/template\\.py$",
|
||||
"^src/llama_stack/core/build\\.py$",
|
||||
"^src/llama_stack/core/client\\.py$",
|
||||
"^src/llama_stack/core/request_headers\\.py$",
|
||||
"^src/llama_stack/core/routers/",
|
||||
"^src/llama_stack/core/routing_tables/",
|
||||
"^src/llama_stack/core/server/endpoints\\.py$",
|
||||
"^src/llama_stack/core/server/server\\.py$",
|
||||
"^src/llama_stack/core/stack\\.py$",
|
||||
"^src/llama_stack/core/store/registry\\.py$",
|
||||
"^src/llama_stack/core/utils/exec\\.py$",
|
||||
"^src/llama_stack/core/utils/prompt_for_config\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/interface\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
|
||||
"^src/llama_stack/providers/inline/datasetio/localfs/",
|
||||
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
||||
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/generation\\.py$",
|
||||
"^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
|
||||
"^src/llama_stack/models/llama/llama4/",
|
||||
"^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
|
||||
"^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
|
||||
"^src/llama_stack/providers/inline/safety/code_scanner/",
|
||||
"^src/llama_stack/providers/inline/safety/llama_guard/",
|
||||
"^src/llama_stack/providers/inline/scoring/basic/",
|
||||
"^src/llama_stack/providers/inline/scoring/braintrust/",
|
||||
"^src/llama_stack/providers/inline/scoring/llm_as_judge/",
|
||||
"^src/llama_stack/providers/remote/agents/sample/",
|
||||
"^src/llama_stack/providers/remote/datasetio/huggingface/",
|
||||
"^src/llama_stack/providers/remote/datasetio/nvidia/",
|
||||
"^src/llama_stack/providers/remote/inference/bedrock/",
|
||||
"^src/llama_stack/providers/remote/inference/nvidia/",
|
||||
"^src/llama_stack/providers/remote/inference/passthrough/",
|
||||
"^src/llama_stack/providers/remote/inference/runpod/",
|
||||
"^src/llama_stack/providers/remote/inference/tgi/",
|
||||
"^src/llama_stack/providers/remote/inference/watsonx/",
|
||||
"^src/llama_stack/providers/remote/safety/bedrock/",
|
||||
"^src/llama_stack/providers/remote/safety/nvidia/",
|
||||
"^src/llama_stack/providers/remote/safety/sambanova/",
|
||||
"^src/llama_stack/providers/remote/safety/sample/",
|
||||
"^src/llama_stack/providers/remote/tool_runtime/bing_search/",
|
||||
"^src/llama_stack/providers/remote/tool_runtime/brave_search/",
|
||||
"^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
|
||||
"^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
|
||||
"^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
|
||||
"^src/llama_stack/providers/remote/post_training/nvidia/",
|
||||
"^src/llama_stack/providers/remote/vector_io/chroma/",
|
||||
"^src/llama_stack/providers/remote/vector_io/milvus/",
|
||||
"^src/llama_stack/providers/remote/vector_io/pgvector/",
|
||||
"^src/llama_stack/providers/remote/vector_io/qdrant/",
|
||||
"^src/llama_stack/providers/remote/vector_io/sample/",
|
||||
"^src/llama_stack/providers/remote/vector_io/weaviate/",
|
||||
"^src/llama_stack/providers/utils/bedrock/client\\.py$",
|
||||
"^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
|
||||
"^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
|
||||
"^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
|
||||
"^src/llama_stack/providers/utils/inference/model_registry\\.py$",
|
||||
"^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
|
||||
"^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
|
||||
"^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
|
||||
"^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
|
||||
"^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
|
||||
"^src/llama_stack/providers/utils/memory/vector_store\\.py$",
|
||||
"^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
|
||||
"^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
|
||||
"^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
|
||||
"^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
|
||||
"^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
|
||||
"^src/llama_stack/strong_typing/auxiliary\\.py$",
|
||||
"^src/llama_stack/distributions/template\\.py$",
|
||||
]
|
||||
|
||||
[[tool.mypy.overrides]]
|
||||
# packages that lack typing annotations, do not have stubs, or are unavailable.
|
||||
module = ["yaml", "fire"]
|
||||
module = [
|
||||
"yaml",
|
||||
"fire",
|
||||
"torchtune.*",
|
||||
"fairscale.*",
|
||||
"torchvision.*",
|
||||
"datasets",
|
||||
"nest_asyncio",
|
||||
"streamlit_option_menu",
|
||||
"lmformatenforcer.*",
|
||||
]
|
||||
ignore_missing_imports = true
|
||||
|
||||
[tool.pydantic-mypy]
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
PACKAGE_DIR="${1:-llama_stack}"
|
||||
PACKAGE_DIR="${1:-src/llama_stack}"
|
||||
|
||||
if [ ! -d "$PACKAGE_DIR" ]; then
|
||||
echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke
|
|||
if template_func := getattr(module, "get_distribution_template", None):
|
||||
distro = template_func()
|
||||
|
||||
yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name
|
||||
yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name
|
||||
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
|
||||
change_tracker.add_paths(yaml_output_dir, doc_output_dir)
|
||||
distro.save_distribution(
|
||||
|
|
@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
|
|||
|
||||
|
||||
def main():
|
||||
distros_dir = REPO_ROOT / "llama_stack" / "distributions"
|
||||
distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
|
||||
change_tracker = ChangedPathTracker()
|
||||
|
||||
with Progress(
|
||||
|
|
|
|||
|
|
@ -30,8 +30,10 @@ materialize_telemetry_configs() {
|
|||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||
local prom_cfg="${dest}/prometheus.yml"
|
||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
|
||||
local dash_json="${dest}/llama-stack-dashboard.json"
|
||||
|
||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
|
||||
if [ -e "$asset" ]; then
|
||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||
fi
|
||||
|
|
@ -103,6 +105,7 @@ datasources:
|
|||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
uid: prometheus
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
|
|
@ -112,6 +115,224 @@ datasources:
|
|||
url: http://jaeger:16686
|
||||
editable: true
|
||||
EOF
|
||||
|
||||
cat <<'EOF' > "$graf_dash_cfg"
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Llama Stack'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
EOF
|
||||
|
||||
# Copy the dashboard JSON inline to avoid line-length issues
|
||||
cat > "$dash_json" <<'DASHBOARD_JSON'
|
||||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [{"color": "green", "value": null}]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "none"}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "prometheus"},
|
||||
"expr": "llama_stack_completion_tokens_total",
|
||||
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Completion Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "none"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
|
||||
],
|
||||
"title": "Prompt & Total Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "ms"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "none"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
|
||||
],
|
||||
"title": "HTTP Request Duration (p95, p99)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
|
||||
],
|
||||
"title": "Total Requests",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
|
||||
],
|
||||
"title": "Active Requests",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "none"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
|
||||
],
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||
"mappings": [],
|
||||
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||
"tooltip": {"mode": "multi", "sort": "none"}
|
||||
},
|
||||
"targets": [
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
|
||||
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
|
||||
],
|
||||
"title": "Request/Response Sizes",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 38,
|
||||
"tags": ["llama-stack"],
|
||||
"templating": {"list": []},
|
||||
"time": {"from": "now-15m", "to": "now"},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Llama Stack Metrics",
|
||||
"uid": "llama-stack-metrics",
|
||||
"version": 0,
|
||||
"weekStart": ""
|
||||
}
|
||||
DASHBOARD_JSON
|
||||
}
|
||||
|
||||
# Cleanup function to remove temporary files
|
||||
|
|
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
|
|||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||
die "Grafana startup failed"
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
echo "=== Starting Llama Stack Server ==="
|
||||
export LLAMA_STACK_LOG_WIDTH=120
|
||||
|
||||
# Configure telemetry collector for server mode
|
||||
# Use a fixed port for the OTEL collector so the server can connect to it
|
||||
COLLECTOR_PORT=4317
|
||||
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
|
||||
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
|
||||
export OTEL_BSP_SCHEDULE_DELAY="200"
|
||||
export OTEL_BSP_EXPORT_TIMEOUT="2000"
|
||||
|
||||
# remove "server:" from STACK_CONFIG
|
||||
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
||||
nohup llama stack run $stack_config > server.log 2>&1 &
|
||||
|
|
@ -284,10 +293,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
docker stop "$container_name" 2>/dev/null || true
|
||||
docker rm "$container_name" 2>/dev/null || true
|
||||
|
||||
# Configure telemetry collector port shared between host and container
|
||||
COLLECTOR_PORT=4317
|
||||
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||
|
||||
# Build environment variables for docker run
|
||||
DOCKER_ENV_VARS=""
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
|
||||
|
||||
# Pass through API keys if they exist
|
||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||
|
|
@ -308,8 +322,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
|||
fi
|
||||
echo "Using image: $IMAGE_NAME"
|
||||
|
||||
docker run -d --network host --name "$container_name" \
|
||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||
# On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
|
||||
# Use regular port mapping instead
|
||||
NETWORK_MODE=""
|
||||
PORT_MAPPINGS=""
|
||||
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||
NETWORK_MODE="--network host"
|
||||
else
|
||||
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
|
||||
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
|
||||
echo "Using bridge networking with port mapping (non-Linux)"
|
||||
fi
|
||||
|
||||
docker run -d $NETWORK_MODE --name "$container_name" \
|
||||
$PORT_MAPPINGS \
|
||||
$DOCKER_ENV_VARS \
|
||||
"$IMAGE_NAME" \
|
||||
--port $LLAMA_STACK_PORT
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
# the root directory of this source tree.
|
||||
|
||||
set -e
|
||||
cd llama_stack/ui
|
||||
cd src/llama_stack/ui
|
||||
|
||||
if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
|
||||
echo "UI dependencies not installed, skipping prettier/linter check"
|
||||
|
|
|
|||
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: 'Llama Stack'
|
||||
orgId: 1
|
||||
folder: ''
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /etc/grafana/provisioning/dashboards
|
||||
|
|
@ -5,6 +5,7 @@ datasources:
|
|||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
uid: prometheus
|
||||
isDefault: true
|
||||
editable: true
|
||||
|
||||
|
|
|
|||
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
|
|
@ -0,0 +1,457 @@
|
|||
{
|
||||
"annotations": {
|
||||
"list": []
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_completion_tokens_total",
|
||||
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Completion Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_prompt_tokens_total",
|
||||
"legendFormat": "Prompt - {{model_id}}",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "llama_stack_tokens_total",
|
||||
"legendFormat": "Total - {{model_id}}",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Prompt & Total Tokens",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ms"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 8
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||
"legendFormat": "p95",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||
"legendFormat": "p99",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Request Duration (p95, p99)",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 8
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Total Requests",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 8
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "none",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "auto",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "auto"
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "sum(llama_stack_http_server_active_requests)",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Active Requests",
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "reqps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
|
||||
"legendFormat": "{{http_target}} - {{http_status_code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Request Rate",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {
|
||||
"drawStyle": "line",
|
||||
"lineInterpolation": "linear",
|
||||
"showPoints": "auto",
|
||||
"fillOpacity": 10
|
||||
},
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "Bps"
|
||||
}
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 8,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "list",
|
||||
"placement": "bottom",
|
||||
"showLegend": true
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "none"
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
|
||||
"legendFormat": "Request",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
|
||||
"legendFormat": "Response",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Request/Response Sizes",
|
||||
"type": "timeseries"
|
||||
}
|
||||
],
|
||||
"refresh": "5s",
|
||||
"schemaVersion": 38,
|
||||
"tags": [
|
||||
"llama-stack"
|
||||
],
|
||||
"templating": {
|
||||
"list": []
|
||||
},
|
||||
"time": {
|
||||
"from": "now-15m",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {},
|
||||
"timezone": "browser",
|
||||
"title": "Llama Stack Metrics",
|
||||
"uid": "llama-stack-metrics",
|
||||
"version": 0,
|
||||
"weekStart": ""
|
||||
}
|
||||
|
|
@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
|
|||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||
docker.io/grafana/grafana:11.0.0
|
||||
|
||||
# Wait for services to start
|
||||
|
|
|
|||
|
|
@ -27,4 +27,4 @@ fi
|
|||
|
||||
# Run unit tests with coverage
|
||||
uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
|
||||
coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@"
|
||||
coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@"
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ from .openai_responses import (
|
|||
OpenAIResponseInputTool,
|
||||
OpenAIResponseObject,
|
||||
OpenAIResponseObjectStream,
|
||||
OpenAIResponsePrompt,
|
||||
OpenAIResponseText,
|
||||
)
|
||||
|
||||
|
|
@ -149,13 +150,13 @@ class ShieldCallStep(StepCommon):
|
|||
class MemoryRetrievalStep(StepCommon):
|
||||
"""A memory retrieval step in an agent turn.
|
||||
|
||||
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
|
||||
:param vector_store_ids: The IDs of the vector databases to retrieve context from.
|
||||
:param inserted_context: The context retrieved from the vector databases.
|
||||
"""
|
||||
|
||||
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
||||
# TODO: should this be List[str]?
|
||||
vector_db_ids: str
|
||||
vector_store_ids: str
|
||||
inserted_context: InterleavedContent
|
||||
|
||||
|
||||
|
|
@ -810,6 +811,7 @@ class Agents(Protocol):
|
|||
self,
|
||||
input: str | list[OpenAIResponseInput],
|
||||
model: str,
|
||||
prompt: OpenAIResponsePrompt | None = None,
|
||||
instructions: str | None = None,
|
||||
previous_response_id: str | None = None,
|
||||
conversation: str | None = None,
|
||||
|
|
@ -831,6 +833,7 @@ class Agents(Protocol):
|
|||
|
||||
:param input: Input message(s) to create the response.
|
||||
:param model: The underlying LLM used for completions.
|
||||
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
||||
:param include: (Optional) Additional fields to include in the response.
|
||||
|
|
@ -4,9 +4,10 @@
|
|||
# This source code is licensed under the terms described in the LICENSE file in
|
||||
# the root directory of this source tree.
|
||||
|
||||
from collections.abc import Sequence
|
||||
from typing import Annotated, Any, Literal
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from pydantic import BaseModel, Field, model_validator
|
||||
from typing_extensions import TypedDict
|
||||
|
||||
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
|
||||
|
|
@ -46,23 +47,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
|
|||
|
||||
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
||||
:param type: Content type identifier, always "input_image"
|
||||
:param file_id: (Optional) The ID of the file to be sent to the model.
|
||||
:param image_url: (Optional) URL of the image content
|
||||
"""
|
||||
|
||||
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
||||
type: Literal["input_image"] = "input_image"
|
||||
# TODO: handle file_id
|
||||
file_id: str | None = None
|
||||
image_url: str | None = None
|
||||
|
||||
|
||||
# TODO: handle file content types
|
||||
@json_schema_type
|
||||
class OpenAIResponseInputMessageContentFile(BaseModel):
|
||||
"""File content for input messages in OpenAI response format.
|
||||
|
||||
:param type: The type of the input item. Always `input_file`.
|
||||
:param file_data: The data of the file to be sent to the model.
|
||||
:param file_id: (Optional) The ID of the file to be sent to the model.
|
||||
:param file_url: The URL of the file to be sent to the model.
|
||||
:param filename: The name of the file to be sent to the model.
|
||||
"""
|
||||
|
||||
type: Literal["input_file"] = "input_file"
|
||||
file_data: str | None = None
|
||||
file_id: str | None = None
|
||||
file_url: str | None = None
|
||||
filename: str | None = None
|
||||
|
||||
@model_validator(mode="after")
|
||||
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
|
||||
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
|
||||
raise ValueError(
|
||||
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
|
||||
)
|
||||
return self
|
||||
|
||||
|
||||
OpenAIResponseInputMessageContent = Annotated[
|
||||
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
|
||||
OpenAIResponseInputMessageContentText
|
||||
| OpenAIResponseInputMessageContentImage
|
||||
| OpenAIResponseInputMessageContentFile,
|
||||
Field(discriminator="type"),
|
||||
]
|
||||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponsePrompt(BaseModel):
|
||||
"""OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||
|
||||
:param id: Unique identifier of the prompt template
|
||||
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
|
||||
like images or files.
|
||||
:param version: Version number of the prompt to use (defaults to latest if not specified)
|
||||
"""
|
||||
|
||||
id: str
|
||||
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
|
||||
version: str | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||
"""File citation annotation for referencing specific files in response content.
|
||||
|
|
@ -159,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
|
|||
scenarios.
|
||||
"""
|
||||
|
||||
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
|
||||
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
|
||||
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
||||
type: Literal["message"] = "message"
|
||||
|
||||
|
|
@ -211,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
|||
"""
|
||||
|
||||
id: str
|
||||
queries: list[str]
|
||||
queries: Sequence[str]
|
||||
status: str
|
||||
type: Literal["file_search_call"] = "file_search_call"
|
||||
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
||||
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
||||
|
||||
|
||||
@json_schema_type
|
||||
|
|
@ -538,6 +582,7 @@ class OpenAIResponseObject(BaseModel):
|
|||
:param output: List of generated output items (messages, tool calls, etc.)
|
||||
:param parallel_tool_calls: Whether tool calls can be executed in parallel
|
||||
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
||||
:param prompt: (Optional) Reference to a prompt template and its variables.
|
||||
:param status: Current status of the response generation
|
||||
:param temperature: (Optional) Sampling temperature used for generation
|
||||
:param text: Text formatting configuration for the response
|
||||
|
|
@ -553,16 +598,17 @@ class OpenAIResponseObject(BaseModel):
|
|||
id: str
|
||||
model: str
|
||||
object: Literal["response"] = "response"
|
||||
output: list[OpenAIResponseOutput]
|
||||
output: Sequence[OpenAIResponseOutput]
|
||||
parallel_tool_calls: bool = False
|
||||
previous_response_id: str | None = None
|
||||
prompt: OpenAIResponsePrompt | None = None
|
||||
status: str
|
||||
temperature: float | None = None
|
||||
# Default to text format to avoid breaking the loading of old responses
|
||||
# before the field was added. New responses will have this set always.
|
||||
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
||||
top_p: float | None = None
|
||||
tools: list[OpenAIResponseTool] | None = None
|
||||
tools: Sequence[OpenAIResponseTool] | None = None
|
||||
truncation: str | None = None
|
||||
usage: OpenAIResponseUsage | None = None
|
||||
instructions: str | None = None
|
||||
|
|
@ -1254,14 +1300,9 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
|||
|
||||
OpenAIResponseInput = Annotated[
|
||||
# Responses API allows output messages to be passed in as input
|
||||
OpenAIResponseOutputMessageWebSearchToolCall
|
||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
||||
| OpenAIResponseOutputMessageFunctionToolCall
|
||||
OpenAIResponseOutput
|
||||
| OpenAIResponseInputFunctionToolCallOutput
|
||||
| OpenAIResponseMCPApprovalRequest
|
||||
| OpenAIResponseMCPApprovalResponse
|
||||
| OpenAIResponseOutputMessageMCPCall
|
||||
| OpenAIResponseOutputMessageMCPListTools
|
||||
| OpenAIResponseMessage,
|
||||
Field(union_mode="left_to_right"),
|
||||
]
|
||||
|
|
@ -1275,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
|
|||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseInput]
|
||||
data: Sequence[OpenAIResponseInput]
|
||||
object: Literal["list"] = "list"
|
||||
|
||||
|
||||
|
|
@ -1286,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
|||
:param input: List of input items that led to this response
|
||||
"""
|
||||
|
||||
input: list[OpenAIResponseInput]
|
||||
input: Sequence[OpenAIResponseInput]
|
||||
|
||||
def to_response_object(self) -> OpenAIResponseObject:
|
||||
"""Convert to OpenAIResponseObject by excluding input field."""
|
||||
|
|
@ -1304,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
|
|||
:param object: Object type identifier, always "list"
|
||||
"""
|
||||
|
||||
data: list[OpenAIResponseObjectWithInput]
|
||||
data: Sequence[OpenAIResponseObjectWithInput]
|
||||
has_more: bool
|
||||
first_id: str
|
||||
last_id: str
|
||||
|
|
@ -21,8 +21,8 @@ from typing_extensions import TypedDict
|
|||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||
from llama_stack.apis.common.responses import Order
|
||||
from llama_stack.apis.models import Model
|
||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
|
||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||
from llama_stack.models.llama.datatypes import (
|
||||
BuiltinTool,
|
||||
|
|
@ -97,7 +97,7 @@ class SamplingParams(BaseModel):
|
|||
|
||||
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
||||
|
||||
max_tokens: int | None = 0
|
||||
max_tokens: int | None = None
|
||||
repetition_penalty: float | None = 1.0
|
||||
stop: list[str] | None = None
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue