mirror of
https://github.com/meta-llama/llama-stack.git
synced 2025-12-03 09:53:45 +00:00
Merge remote-tracking branch 'upstream/main' into elasticsearch-integration
This commit is contained in:
commit
2407115ee8
1050 changed files with 65153 additions and 2821 deletions
64
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
64
.github/actions/install-llama-stack-client/action.yml
vendored
Normal file
|
|
@ -0,0 +1,64 @@
|
||||||
|
name: Install llama-stack-client
|
||||||
|
description: Install llama-stack-client based on branch context and client-version input
|
||||||
|
|
||||||
|
inputs:
|
||||||
|
client-version:
|
||||||
|
description: 'Client version to install on non-release branches (latest or published). Ignored on release branches.'
|
||||||
|
required: false
|
||||||
|
default: ""
|
||||||
|
|
||||||
|
outputs:
|
||||||
|
uv-index-url:
|
||||||
|
description: 'UV_INDEX_URL to use (set for release branches)'
|
||||||
|
value: ${{ steps.configure.outputs.uv-index-url }}
|
||||||
|
uv-extra-index-url:
|
||||||
|
description: 'UV_EXTRA_INDEX_URL to use (set for release branches)'
|
||||||
|
value: ${{ steps.configure.outputs.uv-extra-index-url }}
|
||||||
|
install-after-sync:
|
||||||
|
description: 'Whether to install client after uv sync'
|
||||||
|
value: ${{ steps.configure.outputs.install-after-sync }}
|
||||||
|
install-source:
|
||||||
|
description: 'Where to install client from after sync'
|
||||||
|
value: ${{ steps.configure.outputs.install-source }}
|
||||||
|
|
||||||
|
runs:
|
||||||
|
using: "composite"
|
||||||
|
steps:
|
||||||
|
- name: Configure client installation
|
||||||
|
id: configure
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
# Determine the branch we're working with
|
||||||
|
BRANCH="${{ github.base_ref || github.ref }}"
|
||||||
|
BRANCH="${BRANCH#refs/heads/}"
|
||||||
|
|
||||||
|
echo "Working with branch: $BRANCH"
|
||||||
|
|
||||||
|
# On release branches: use test.pypi for uv sync, then install from git
|
||||||
|
# On non-release branches: install based on client-version after sync
|
||||||
|
if [[ "$BRANCH" =~ ^release-[0-9]+\.[0-9]+\.x$ ]]; then
|
||||||
|
echo "Detected release branch: $BRANCH"
|
||||||
|
|
||||||
|
# Check if matching branch exists in client repo
|
||||||
|
if ! git ls-remote --exit-code --heads https://github.com/llamastack/llama-stack-client-python.git "$BRANCH" > /dev/null 2>&1; then
|
||||||
|
echo "::error::Branch $BRANCH not found in llama-stack-client-python repository"
|
||||||
|
echo "::error::Please create the matching release branch in llama-stack-client-python before testing"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Configure to use test.pypi for sync (to resolve RC versions)
|
||||||
|
echo "uv-index-url=https://test.pypi.org/simple/" >> $GITHUB_OUTPUT
|
||||||
|
echo "uv-extra-index-url=https://pypi.org/simple/" >> $GITHUB_OUTPUT
|
||||||
|
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@$BRANCH" >> $GITHUB_OUTPUT
|
||||||
|
elif [ "${{ inputs.client-version }}" = "latest" ]; then
|
||||||
|
# Install from main git after sync
|
||||||
|
echo "install-after-sync=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "install-source=git+https://github.com/llamastack/llama-stack-client-python.git@main" >> $GITHUB_OUTPUT
|
||||||
|
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
||||||
|
# Use published version from PyPI (installed by sync)
|
||||||
|
echo "install-after-sync=false" >> $GITHUB_OUTPUT
|
||||||
|
elif [ -n "${{ inputs.client-version }}" ]; then
|
||||||
|
echo "::error::Invalid client-version: ${{ inputs.client-version }}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
@ -94,7 +94,7 @@ runs:
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||||
with:
|
with:
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt || '' }}-${{ strategy.job-index }}
|
name: logs-${{ github.run_id }}-${{ github.run_attempt || '1' }}-${{ strategy.job-index || github.job }}-${{ github.action }}
|
||||||
path: |
|
path: |
|
||||||
*.log
|
*.log
|
||||||
retention-days: 1
|
retention-days: 1
|
||||||
|
|
|
||||||
23
.github/actions/setup-runner/action.yml
vendored
23
.github/actions/setup-runner/action.yml
vendored
|
|
@ -18,8 +18,17 @@ runs:
|
||||||
python-version: ${{ inputs.python-version }}
|
python-version: ${{ inputs.python-version }}
|
||||||
version: 0.7.6
|
version: 0.7.6
|
||||||
|
|
||||||
|
- name: Configure client installation
|
||||||
|
id: client-config
|
||||||
|
uses: ./.github/actions/install-llama-stack-client
|
||||||
|
with:
|
||||||
|
client-version: ${{ inputs.client-version }}
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
shell: bash
|
shell: bash
|
||||||
|
env:
|
||||||
|
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
|
||||||
|
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||||
run: |
|
run: |
|
||||||
echo "Updating project dependencies via uv sync"
|
echo "Updating project dependencies via uv sync"
|
||||||
uv sync --all-groups
|
uv sync --all-groups
|
||||||
|
|
@ -27,16 +36,10 @@ runs:
|
||||||
echo "Installing ad-hoc dependencies"
|
echo "Installing ad-hoc dependencies"
|
||||||
uv pip install faiss-cpu
|
uv pip install faiss-cpu
|
||||||
|
|
||||||
# Install llama-stack-client-python based on the client-version input
|
# Install specific client version after sync if needed
|
||||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||||
echo "Installing latest llama-stack-client-python from main branch"
|
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||||
uv pip install git+https://github.com/llamastack/llama-stack-client-python.git@main
|
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
|
||||||
echo "Installing published llama-stack-client-python from PyPI"
|
|
||||||
uv pip install llama-stack-client
|
|
||||||
else
|
|
||||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
|
||||||
exit 1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Installed llama packages"
|
echo "Installed llama packages"
|
||||||
|
|
|
||||||
|
|
@ -42,18 +42,7 @@ runs:
|
||||||
- name: Build Llama Stack
|
- name: Build Llama Stack
|
||||||
shell: bash
|
shell: bash
|
||||||
run: |
|
run: |
|
||||||
# Install llama-stack-client-python based on the client-version input
|
# Client is already installed by setup-runner (handles both main and release branches)
|
||||||
if [ "${{ inputs.client-version }}" = "latest" ]; then
|
|
||||||
echo "Installing latest llama-stack-client-python from main branch"
|
|
||||||
export LLAMA_STACK_CLIENT_DIR=git+https://github.com/llamastack/llama-stack-client-python.git@main
|
|
||||||
elif [ "${{ inputs.client-version }}" = "published" ]; then
|
|
||||||
echo "Installing published llama-stack-client-python from PyPI"
|
|
||||||
unset LLAMA_STACK_CLIENT_DIR
|
|
||||||
else
|
|
||||||
echo "Invalid client-version: ${{ inputs.client-version }}"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Building Llama Stack"
|
echo "Building Llama Stack"
|
||||||
|
|
||||||
LLAMA_STACK_DIR=. \
|
LLAMA_STACK_DIR=. \
|
||||||
|
|
|
||||||
1
.github/workflows/README.md
vendored
1
.github/workflows/README.md
vendored
|
|
@ -4,6 +4,7 @@ Llama Stack uses GitHub Actions for Continuous Integration (CI). Below is a tabl
|
||||||
|
|
||||||
| Name | File | Purpose |
|
| Name | File | Purpose |
|
||||||
| ---- | ---- | ------- |
|
| ---- | ---- | ------- |
|
||||||
|
| Backward Compatibility Check | [backward-compat.yml](backward-compat.yml) | Check backward compatibility for run.yaml configs |
|
||||||
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
| Update Changelog | [changelog.yml](changelog.yml) | Creates PR for updating the CHANGELOG.md |
|
||||||
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
| API Conformance Tests | [conformance.yml](conformance.yml) | Run the API Conformance test suite on the changes. |
|
||||||
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
| Installer CI | [install-script-ci.yml](install-script-ci.yml) | Test the installation script |
|
||||||
|
|
|
||||||
578
.github/workflows/backward-compat.yml
vendored
Normal file
578
.github/workflows/backward-compat.yml
vendored
Normal file
|
|
@ -0,0 +1,578 @@
|
||||||
|
name: Backward Compatibility Check
|
||||||
|
|
||||||
|
run-name: Check backward compatibility for run.yaml configs
|
||||||
|
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.[0-9]+.[0-9]+'
|
||||||
|
- 'release-[0-9]+.[0-9]+.[0-9]+'
|
||||||
|
- 'release-[0-9]+.[0-9]+'
|
||||||
|
paths:
|
||||||
|
- 'src/llama_stack/core/datatypes.py'
|
||||||
|
- 'src/llama_stack/providers/datatypes.py'
|
||||||
|
- 'src/llama_stack/distributions/**/run.yaml'
|
||||||
|
- 'tests/backward_compat/**'
|
||||||
|
- '.github/workflows/backward-compat.yml'
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
check-main-compatibility:
|
||||||
|
name: Check Compatibility with main
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout PR branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
fetch-depth: 0 # Need full history to access main branch
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
uv sync --group dev
|
||||||
|
|
||||||
|
- name: Extract run.yaml files from main branch
|
||||||
|
id: extract_configs
|
||||||
|
run: |
|
||||||
|
# Get list of run.yaml paths from main
|
||||||
|
git fetch origin main
|
||||||
|
CONFIG_PATHS=$(git ls-tree -r --name-only origin/main | grep "src/llama_stack/distributions/.*/run.yaml$" || true)
|
||||||
|
|
||||||
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
|
echo "No run.yaml files found in main branch"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract all configs to a temp directory
|
||||||
|
mkdir -p /tmp/main_configs
|
||||||
|
echo "Extracting configs from main branch:"
|
||||||
|
|
||||||
|
while IFS= read -r config_path; do
|
||||||
|
if [ -z "$config_path" ]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract filename for storage
|
||||||
|
filename=$(basename $(dirname "$config_path"))
|
||||||
|
echo " - $filename (from $config_path)"
|
||||||
|
|
||||||
|
git show origin/main:"$config_path" > "/tmp/main_configs/${filename}.yaml"
|
||||||
|
done <<< "$CONFIG_PATHS"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Extracted $(ls /tmp/main_configs/*.yaml | wc -l) config files"
|
||||||
|
|
||||||
|
- name: Test all configs from main
|
||||||
|
id: test_configs
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
# Run pytest once with all configs parameterized
|
||||||
|
if COMPAT_TEST_CONFIGS_DIR=/tmp/main_configs uv run pytest tests/backward_compat/test_run_config.py -v; then
|
||||||
|
echo "failed=false" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "failed=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Check for breaking change acknowledgment
|
||||||
|
id: check_ack
|
||||||
|
if: steps.test_configs.outputs.failed == 'true'
|
||||||
|
run: |
|
||||||
|
echo "Breaking changes detected. Checking for acknowledgment..."
|
||||||
|
|
||||||
|
# Check PR title for '!:' marker (conventional commits)
|
||||||
|
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||||
|
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
|
||||||
|
echo "✓ Breaking change acknowledged in PR title"
|
||||||
|
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check commit messages for BREAKING CHANGE:
|
||||||
|
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
|
||||||
|
echo "✓ Breaking change acknowledged in commit message"
|
||||||
|
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✗ Breaking change NOT acknowledged"
|
||||||
|
echo "acknowledged=false" >> $GITHUB_OUTPUT
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Evaluate results
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
FAILED="${{ steps.test_configs.outputs.failed }}"
|
||||||
|
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
|
||||||
|
|
||||||
|
if [[ "$FAILED" == "true" ]]; then
|
||||||
|
if [[ "$ACKNOWLEDGED" == "true" ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "⚠️ WARNING: Breaking changes detected but acknowledged"
|
||||||
|
echo ""
|
||||||
|
echo "This PR introduces backward-incompatible changes to run.yaml."
|
||||||
|
echo "The changes have been properly acknowledged."
|
||||||
|
echo ""
|
||||||
|
exit 0 # Pass the check
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "❌ ERROR: Breaking changes detected without acknowledgment"
|
||||||
|
echo ""
|
||||||
|
echo "This PR introduces backward-incompatible changes to run.yaml"
|
||||||
|
echo "that will break existing user configurations."
|
||||||
|
echo ""
|
||||||
|
echo "To acknowledge this breaking change, do ONE of:"
|
||||||
|
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
|
||||||
|
echo " 2. Add the 'breaking-change' label to this PR"
|
||||||
|
echo " 3. Include 'BREAKING CHANGE:' in a commit message"
|
||||||
|
echo ""
|
||||||
|
exit 1 # Fail the check
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
test-integration-main:
|
||||||
|
name: Run Integration Tests with main Config
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout PR branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Extract ci-tests run.yaml from main
|
||||||
|
run: |
|
||||||
|
git fetch origin main
|
||||||
|
git show origin/main:src/llama_stack/distributions/ci-tests/run.yaml > /tmp/main-ci-tests-run.yaml
|
||||||
|
echo "Extracted ci-tests run.yaml from main branch"
|
||||||
|
|
||||||
|
- name: Setup test environment
|
||||||
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
client-version: 'latest'
|
||||||
|
setup: 'ollama'
|
||||||
|
suite: 'base'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
|
- name: Run integration tests with main config
|
||||||
|
id: test_integration
|
||||||
|
continue-on-error: true
|
||||||
|
uses: ./.github/actions/run-and-record-tests
|
||||||
|
with:
|
||||||
|
stack-config: /tmp/main-ci-tests-run.yaml
|
||||||
|
setup: 'ollama'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
suite: 'base'
|
||||||
|
|
||||||
|
- name: Check for breaking change acknowledgment
|
||||||
|
id: check_ack
|
||||||
|
if: steps.test_integration.outcome == 'failure'
|
||||||
|
run: |
|
||||||
|
echo "Integration tests failed. Checking for acknowledgment..."
|
||||||
|
|
||||||
|
# Check PR title for '!:' marker (conventional commits)
|
||||||
|
PR_TITLE="${{ github.event.pull_request.title }}"
|
||||||
|
if [[ "$PR_TITLE" =~ ^[a-z]+\!: ]]; then
|
||||||
|
echo "✓ Breaking change acknowledged in PR title"
|
||||||
|
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check commit messages for BREAKING CHANGE:
|
||||||
|
if git log origin/main..HEAD --format=%B | grep -q "BREAKING CHANGE:"; then
|
||||||
|
echo "✓ Breaking change acknowledged in commit message"
|
||||||
|
echo "acknowledged=true" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "✗ Breaking change NOT acknowledged"
|
||||||
|
echo "acknowledged=false" >> $GITHUB_OUTPUT
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Evaluate integration test results
|
||||||
|
if: always()
|
||||||
|
run: |
|
||||||
|
TEST_FAILED="${{ steps.test_integration.outcome == 'failure' }}"
|
||||||
|
ACKNOWLEDGED="${{ steps.check_ack.outputs.acknowledged }}"
|
||||||
|
|
||||||
|
if [[ "$TEST_FAILED" == "true" ]]; then
|
||||||
|
if [[ "$ACKNOWLEDGED" == "true" ]]; then
|
||||||
|
echo ""
|
||||||
|
echo "⚠️ WARNING: Integration tests failed with main config but acknowledged"
|
||||||
|
echo ""
|
||||||
|
exit 0 # Pass the check
|
||||||
|
else
|
||||||
|
echo ""
|
||||||
|
echo "❌ ERROR: Integration tests failed with main config without acknowledgment"
|
||||||
|
echo ""
|
||||||
|
echo "To acknowledge this breaking change, do ONE of:"
|
||||||
|
echo " 1. Add '!:' to your PR title (e.g., 'feat!: change xyz')"
|
||||||
|
echo " 2. Include 'BREAKING CHANGE:' in a commit message"
|
||||||
|
echo ""
|
||||||
|
exit 1 # Fail the check
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
test-integration-release:
|
||||||
|
name: Run Integration Tests with Latest Release (Informational)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout PR branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Get latest release
|
||||||
|
id: get_release
|
||||||
|
run: |
|
||||||
|
# Get the latest release from GitHub
|
||||||
|
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
|
||||||
|
|
||||||
|
if [ -z "$LATEST_TAG" ]; then
|
||||||
|
echo "No releases found, skipping release compatibility check"
|
||||||
|
echo "has_release=false" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Latest release: $LATEST_TAG"
|
||||||
|
echo "has_release=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Extract ci-tests run.yaml from release
|
||||||
|
if: steps.get_release.outputs.has_release == 'true'
|
||||||
|
id: extract_config
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
|
# Try with src/ prefix first (newer releases), then without (older releases)
|
||||||
|
if git show "$RELEASE_TAG:src/llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
||||||
|
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (src/ path)"
|
||||||
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
|
elif git show "$RELEASE_TAG:llama_stack/distributions/ci-tests/run.yaml" > /tmp/release-ci-tests-run.yaml 2>/dev/null; then
|
||||||
|
echo "Extracted ci-tests run.yaml from release $RELEASE_TAG (old path)"
|
||||||
|
echo "has_config=true" >> $GITHUB_OUTPUT
|
||||||
|
else
|
||||||
|
echo "::warning::ci-tests/run.yaml not found in release $RELEASE_TAG"
|
||||||
|
echo "has_config=false" >> $GITHUB_OUTPUT
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Setup test environment
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
client-version: 'latest'
|
||||||
|
setup: 'ollama'
|
||||||
|
suite: 'base'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
|
- name: Run integration tests with release config (PR branch)
|
||||||
|
id: test_release_pr
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
continue-on-error: true
|
||||||
|
uses: ./.github/actions/run-and-record-tests
|
||||||
|
with:
|
||||||
|
stack-config: /tmp/release-ci-tests-run.yaml
|
||||||
|
setup: 'ollama'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
suite: 'base'
|
||||||
|
|
||||||
|
- name: Checkout main branch to test baseline
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
run: |
|
||||||
|
git checkout origin/main
|
||||||
|
|
||||||
|
- name: Setup test environment for main
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
uses: ./.github/actions/setup-test-environment
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
client-version: 'latest'
|
||||||
|
setup: 'ollama'
|
||||||
|
suite: 'base'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
|
||||||
|
- name: Run integration tests with release config (main branch)
|
||||||
|
id: test_release_main
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
continue-on-error: true
|
||||||
|
uses: ./.github/actions/run-and-record-tests
|
||||||
|
with:
|
||||||
|
stack-config: /tmp/release-ci-tests-run.yaml
|
||||||
|
setup: 'ollama'
|
||||||
|
inference-mode: 'replay'
|
||||||
|
suite: 'base'
|
||||||
|
|
||||||
|
- name: Report results and post PR comment
|
||||||
|
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_config.outputs.has_config == 'true'
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
PR_OUTCOME="${{ steps.test_release_pr.outcome }}"
|
||||||
|
MAIN_OUTCOME="${{ steps.test_release_main.outcome }}"
|
||||||
|
|
||||||
|
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
|
||||||
|
# NEW breaking change - PR fails but main passes
|
||||||
|
echo "::error::🚨 This PR introduces a NEW breaking change!"
|
||||||
|
|
||||||
|
# Check if we already posted a comment (to avoid spam on every push)
|
||||||
|
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Breaking Change Detected") and contains("Integration tests")) | .id' | head -1)
|
||||||
|
|
||||||
|
if [[ -z "$EXISTING_COMMENT" ]]; then
|
||||||
|
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Breaking Change Detected
|
||||||
|
|
||||||
|
**Integration tests against release \`$RELEASE_TAG\` are now failing**
|
||||||
|
|
||||||
|
⚠️ This PR introduces a breaking change that affects compatibility with the latest release.
|
||||||
|
|
||||||
|
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
|
||||||
|
- Existing configurations may break
|
||||||
|
|
||||||
|
The tests pass on \`main\` but fail with this PR's changes.
|
||||||
|
|
||||||
|
> **Note:** This is informational only and does not block merge.
|
||||||
|
> Consider whether this breaking change is acceptable for users."
|
||||||
|
else
|
||||||
|
echo "Comment already exists, skipping to avoid spam"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## 🚨 NEW Breaking Change Detected
|
||||||
|
|
||||||
|
**Integration tests against release \`$RELEASE_TAG\` FAILED**
|
||||||
|
|
||||||
|
⚠️ **This PR introduces a NEW breaking change**
|
||||||
|
|
||||||
|
- Tests **PASS** on main branch ✅
|
||||||
|
- Tests **FAIL** on PR branch ❌
|
||||||
|
- Users on release \`$RELEASE_TAG\` may not be able to upgrade
|
||||||
|
- Existing configurations may break
|
||||||
|
|
||||||
|
> **Note:** This is informational only and does not block merge.
|
||||||
|
> Consider whether this breaking change is acceptable for users.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
elif [[ "$PR_OUTCOME" == "failure" ]]; then
|
||||||
|
# Existing breaking change - both PR and main fail
|
||||||
|
echo "::warning::Breaking change already exists in main branch"
|
||||||
|
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## ⚠️ Release Compatibility Test Failed (Existing Issue)
|
||||||
|
|
||||||
|
**Integration tests against release \`$RELEASE_TAG\` FAILED**
|
||||||
|
|
||||||
|
- Tests **FAIL** on main branch ❌
|
||||||
|
- Tests **FAIL** on PR branch ❌
|
||||||
|
- This breaking change already exists in main (not introduced by this PR)
|
||||||
|
|
||||||
|
> **Note:** This is informational only.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
else
|
||||||
|
# Success - tests pass
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## ✅ Release Compatibility Test Passed
|
||||||
|
|
||||||
|
Integration tests against release \`$RELEASE_TAG\` passed successfully.
|
||||||
|
This PR maintains compatibility with the latest release.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
check-schema-release-compatibility:
|
||||||
|
name: Check Schema Compatibility with Latest Release (Informational)
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Checkout PR branch
|
||||||
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
with:
|
||||||
|
fetch-depth: 0
|
||||||
|
|
||||||
|
- name: Set up Python
|
||||||
|
uses: actions/setup-python@e797f83bcb11b83ae66e0230d6156d7c80228e7c # v6.0.0
|
||||||
|
with:
|
||||||
|
python-version: '3.12'
|
||||||
|
|
||||||
|
- name: Install uv
|
||||||
|
uses: astral-sh/setup-uv@6b9c6063abd6010835644d4c2e1bef4cf5cd0fca # v6.0.1
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
uv sync --group dev
|
||||||
|
|
||||||
|
- name: Get latest release
|
||||||
|
id: get_release
|
||||||
|
run: |
|
||||||
|
# Get the latest release from GitHub
|
||||||
|
LATEST_TAG=$(gh release list --limit 1 --json tagName --jq '.[0].tagName' 2>/dev/null || echo "")
|
||||||
|
|
||||||
|
if [ -z "$LATEST_TAG" ]; then
|
||||||
|
echo "No releases found, skipping release compatibility check"
|
||||||
|
echo "has_release=false" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "Latest release: $LATEST_TAG"
|
||||||
|
echo "has_release=true" >> $GITHUB_OUTPUT
|
||||||
|
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
|
|
||||||
|
- name: Extract configs from release
|
||||||
|
if: steps.get_release.outputs.has_release == 'true'
|
||||||
|
id: extract_release_configs
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
|
||||||
|
# Get run.yaml files from the release (try both src/ and old path)
|
||||||
|
CONFIG_PATHS=$(git ls-tree -r --name-only "$RELEASE_TAG" | grep "llama_stack/distributions/.*/run.yaml$" || true)
|
||||||
|
|
||||||
|
if [ -z "$CONFIG_PATHS" ]; then
|
||||||
|
echo "::warning::No run.yaml files found in release $RELEASE_TAG"
|
||||||
|
echo "has_configs=false" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Extract all configs to a temp directory
|
||||||
|
mkdir -p /tmp/release_configs
|
||||||
|
echo "Extracting configs from release $RELEASE_TAG:"
|
||||||
|
|
||||||
|
while IFS= read -r config_path; do
|
||||||
|
if [ -z "$config_path" ]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
filename=$(basename $(dirname "$config_path"))
|
||||||
|
echo " - $filename (from $config_path)"
|
||||||
|
|
||||||
|
git show "$RELEASE_TAG:$config_path" > "/tmp/release_configs/${filename}.yaml" 2>/dev/null || true
|
||||||
|
done <<< "$CONFIG_PATHS"
|
||||||
|
|
||||||
|
echo ""
|
||||||
|
echo "Extracted $(ls /tmp/release_configs/*.yaml 2>/dev/null | wc -l) config files"
|
||||||
|
echo "has_configs=true" >> $GITHUB_OUTPUT
|
||||||
|
|
||||||
|
- name: Test against release configs (PR branch)
|
||||||
|
id: test_schema_pr
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
|
||||||
|
|
||||||
|
- name: Checkout main branch to test baseline
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||||
|
run: |
|
||||||
|
git checkout origin/main
|
||||||
|
|
||||||
|
- name: Install dependencies for main
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||||
|
run: |
|
||||||
|
uv sync --group dev
|
||||||
|
|
||||||
|
- name: Test against release configs (main branch)
|
||||||
|
id: test_schema_main
|
||||||
|
if: steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||||
|
continue-on-error: true
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
COMPAT_TEST_CONFIGS_DIR=/tmp/release_configs uv run pytest tests/backward_compat/test_run_config.py -v --tb=short
|
||||||
|
|
||||||
|
- name: Report results and post PR comment
|
||||||
|
if: always() && steps.get_release.outputs.has_release == 'true' && steps.extract_release_configs.outputs.has_configs == 'true'
|
||||||
|
run: |
|
||||||
|
RELEASE_TAG="${{ steps.get_release.outputs.tag }}"
|
||||||
|
PR_OUTCOME="${{ steps.test_schema_pr.outcome }}"
|
||||||
|
MAIN_OUTCOME="${{ steps.test_schema_main.outcome }}"
|
||||||
|
|
||||||
|
if [[ "$PR_OUTCOME" == "failure" && "$MAIN_OUTCOME" == "success" ]]; then
|
||||||
|
# NEW breaking change - PR fails but main passes
|
||||||
|
echo "::error::🚨 This PR introduces a NEW schema breaking change!"
|
||||||
|
|
||||||
|
# Check if we already posted a comment (to avoid spam on every push)
|
||||||
|
EXISTING_COMMENT=$(gh pr view ${{ github.event.pull_request.number }} --json comments --jq '.comments[] | select(.body | contains("🚨 New Schema Breaking Change Detected")) | .id' | head -1)
|
||||||
|
|
||||||
|
if [[ -z "$EXISTING_COMMENT" ]]; then
|
||||||
|
gh pr comment ${{ github.event.pull_request.number }} --body "## 🚨 New Schema Breaking Change Detected
|
||||||
|
|
||||||
|
**Schema validation against release \`$RELEASE_TAG\` is now failing**
|
||||||
|
|
||||||
|
⚠️ This PR introduces a schema breaking change that affects compatibility with the latest release.
|
||||||
|
|
||||||
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
|
- Existing run.yaml configurations will fail validation
|
||||||
|
|
||||||
|
The tests pass on \`main\` but fail with this PR's changes.
|
||||||
|
|
||||||
|
> **Note:** This is informational only and does not block merge.
|
||||||
|
> Consider whether this breaking change is acceptable for users."
|
||||||
|
else
|
||||||
|
echo "Comment already exists, skipping to avoid spam"
|
||||||
|
fi
|
||||||
|
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## 🚨 NEW Schema Breaking Change Detected
|
||||||
|
|
||||||
|
**Schema validation against release \`$RELEASE_TAG\` FAILED**
|
||||||
|
|
||||||
|
⚠️ **This PR introduces a NEW schema breaking change**
|
||||||
|
|
||||||
|
- Tests **PASS** on main branch ✅
|
||||||
|
- Tests **FAIL** on PR branch ❌
|
||||||
|
- Users on release \`$RELEASE_TAG\` will not be able to upgrade
|
||||||
|
- Existing run.yaml configurations will fail validation
|
||||||
|
|
||||||
|
> **Note:** This is informational only and does not block merge.
|
||||||
|
> Consider whether this breaking change is acceptable for users.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
elif [[ "$PR_OUTCOME" == "failure" ]]; then
|
||||||
|
# Existing breaking change - both PR and main fail
|
||||||
|
echo "::warning::Schema breaking change already exists in main branch"
|
||||||
|
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## ⚠️ Release Schema Compatibility Failed (Existing Issue)
|
||||||
|
|
||||||
|
**Schema validation against release \`$RELEASE_TAG\` FAILED**
|
||||||
|
|
||||||
|
- Tests **FAIL** on main branch ❌
|
||||||
|
- Tests **FAIL** on PR branch ❌
|
||||||
|
- This schema breaking change already exists in main (not introduced by this PR)
|
||||||
|
|
||||||
|
> **Note:** This is informational only.
|
||||||
|
EOF
|
||||||
|
|
||||||
|
else
|
||||||
|
# Success - tests pass
|
||||||
|
cat >> $GITHUB_STEP_SUMMARY <<EOF
|
||||||
|
## ✅ Release Schema Compatibility Passed
|
||||||
|
|
||||||
|
All run.yaml configs from release \`$RELEASE_TAG\` are compatible.
|
||||||
|
This PR maintains backward compatibility with the latest release.
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
env:
|
||||||
|
GH_TOKEN: ${{ github.token }}
|
||||||
15
.github/workflows/integration-auth-tests.yml
vendored
15
.github/workflows/integration-auth-tests.yml
vendored
|
|
@ -4,13 +4,17 @@ run-name: Run the integration test suite with Kubernetes authentication
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
paths:
|
paths:
|
||||||
- 'distributions/**'
|
- 'distributions/**'
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- '!llama_stack/ui/**'
|
- '!src/llama_stack/ui/**'
|
||||||
- 'tests/integration/**'
|
- 'tests/integration/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -91,6 +95,9 @@ jobs:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
server:
|
server:
|
||||||
port: 8321
|
port: 8321
|
||||||
EOF
|
EOF
|
||||||
|
|
|
||||||
|
|
@ -4,11 +4,15 @@ run-name: Run the integration test suite with SqlStore
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/providers/utils/sqlstore/**'
|
- 'src/llama_stack/providers/utils/sqlstore/**'
|
||||||
- 'tests/integration/sqlstore/**'
|
- 'tests/integration/sqlstore/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -64,7 +68,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload test logs
|
- name: Upload test logs
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||||
with:
|
with:
|
||||||
name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
|
name: postgres-test-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ matrix.python-version }}
|
||||||
path: |
|
path: |
|
||||||
|
|
|
||||||
14
.github/workflows/integration-tests.yml
vendored
14
.github/workflows/integration-tests.yml
vendored
|
|
@ -4,13 +4,17 @@ run-name: Run the integration test suites from tests/integration in replay mode
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- '!llama_stack/ui/**'
|
- '!src/llama_stack/ui/**'
|
||||||
- 'tests/**'
|
- 'tests/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -47,7 +51,7 @@ jobs:
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
fail-fast: false
|
||||||
matrix:
|
matrix:
|
||||||
client-type: [library, docker]
|
client-type: [library, docker, server]
|
||||||
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
# Use Python 3.13 only on nightly schedule (daily latest client test), otherwise use 3.12
|
||||||
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
python-version: ${{ github.event.schedule == '0 0 * * *' && fromJSON('["3.12", "3.13"]') || fromJSON('["3.12"]') }}
|
||||||
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
client-version: ${{ (github.event.schedule == '0 0 * * *' || github.event.inputs.test-all-client-versions == 'true') && fromJSON('["published", "latest"]') || fromJSON('["latest"]') }}
|
||||||
|
|
|
||||||
|
|
@ -4,12 +4,16 @@ run-name: Run the integration test suite with various VectorIO providers
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- '!llama_stack/ui/**'
|
- '!src/llama_stack/ui/**'
|
||||||
- 'tests/integration/vector_io/**'
|
- 'tests/integration/vector_io/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -209,7 +213,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
- name: Upload all logs to artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||||
with:
|
with:
|
||||||
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
|
name: vector-io-logs-${{ github.run_id }}-${{ github.run_attempt }}-${{ env.SANITIZED_PROVIDER }}-${{ matrix.python-version }}
|
||||||
path: |
|
path: |
|
||||||
|
|
|
||||||
63
.github/workflows/pre-commit.yml
vendored
63
.github/workflows/pre-commit.yml
vendored
|
|
@ -5,7 +5,9 @@ run-name: Run pre-commit checks
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
push:
|
push:
|
||||||
branches: [main]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
group: ${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_id || github.ref }}
|
||||||
|
|
@ -41,25 +43,43 @@ jobs:
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: 'llama_stack/ui/'
|
cache-dependency-path: 'src/llama_stack/ui/'
|
||||||
|
|
||||||
|
- name: Set up uv
|
||||||
|
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||||
|
|
||||||
- name: Install npm dependencies
|
- name: Install npm dependencies
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
|
|
||||||
|
- name: Install pre-commit
|
||||||
|
run: python -m pip install pre-commit
|
||||||
|
|
||||||
|
- name: Cache pre-commit
|
||||||
|
uses: actions/cache@0057852bfaa89a56745cba8c7296529d2fc39830 # v4
|
||||||
|
with:
|
||||||
|
path: ~/.cache/pre-commit
|
||||||
|
key: pre-commit-3|${{ env.pythonLocation }}|${{ hashFiles('.pre-commit-config.yaml') }}
|
||||||
|
|
||||||
- name: Run pre-commit
|
- name: Run pre-commit
|
||||||
id: precommit
|
id: precommit
|
||||||
uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1
|
run: |
|
||||||
continue-on-error: true
|
set +e
|
||||||
|
pre-commit run --show-diff-on-failure --color=always --all-files 2>&1 | tee /tmp/precommit.log
|
||||||
|
status=${PIPESTATUS[0]}
|
||||||
|
echo "status=$status" >> $GITHUB_OUTPUT
|
||||||
|
exit 0
|
||||||
env:
|
env:
|
||||||
SKIP: no-commit-to-branch
|
SKIP: no-commit-to-branch,mypy
|
||||||
RUFF_OUTPUT_FORMAT: github
|
RUFF_OUTPUT_FORMAT: github
|
||||||
|
|
||||||
- name: Check pre-commit results
|
- name: Check pre-commit results
|
||||||
if: steps.precommit.outcome == 'failure'
|
if: steps.precommit.outputs.status != '0'
|
||||||
run: |
|
run: |
|
||||||
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
echo "::error::Pre-commit hooks failed. Please run 'pre-commit run --all-files' locally and commit the fixes."
|
||||||
echo "::warning::Some pre-commit hooks failed. Check the output above for details."
|
echo ""
|
||||||
|
echo "Failed hooks output:"
|
||||||
|
cat /tmp/precommit.log
|
||||||
exit 1
|
exit 1
|
||||||
|
|
||||||
- name: Debug
|
- name: Debug
|
||||||
|
|
@ -109,3 +129,30 @@ jobs:
|
||||||
echo "$unstaged_files"
|
echo "$unstaged_files"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
- name: Configure client installation
|
||||||
|
id: client-config
|
||||||
|
uses: ./.github/actions/install-llama-stack-client
|
||||||
|
|
||||||
|
- name: Sync dev + type_checking dependencies
|
||||||
|
env:
|
||||||
|
UV_INDEX_URL: ${{ steps.client-config.outputs.uv-index-url }}
|
||||||
|
UV_EXTRA_INDEX_URL: ${{ steps.client-config.outputs.uv-extra-index-url }}
|
||||||
|
run: |
|
||||||
|
uv sync --group dev --group type_checking
|
||||||
|
|
||||||
|
# Install specific client version after sync if needed
|
||||||
|
if [ "${{ steps.client-config.outputs.install-after-sync }}" = "true" ]; then
|
||||||
|
echo "Installing llama-stack-client from: ${{ steps.client-config.outputs.install-source }}"
|
||||||
|
uv pip install ${{ steps.client-config.outputs.install-source }}
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Run mypy (full type_checking)
|
||||||
|
run: |
|
||||||
|
set +e
|
||||||
|
uv run --group dev --group type_checking mypy
|
||||||
|
status=$?
|
||||||
|
if [ $status -ne 0 ]; then
|
||||||
|
echo "::error::Full mypy failed. Reproduce locally with 'uv run pre-commit run mypy-full --hook-stage manual --all-files'."
|
||||||
|
fi
|
||||||
|
exit $status
|
||||||
|
|
|
||||||
4
.github/workflows/precommit-trigger.yml
vendored
4
.github/workflows/precommit-trigger.yml
vendored
|
|
@ -145,12 +145,12 @@ jobs:
|
||||||
with:
|
with:
|
||||||
node-version: '20'
|
node-version: '20'
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: 'llama_stack/ui/'
|
cache-dependency-path: 'src/llama_stack/ui/'
|
||||||
|
|
||||||
- name: Install npm dependencies
|
- name: Install npm dependencies
|
||||||
if: steps.check_author.outputs.authorized == 'true'
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
run: npm ci
|
run: npm ci
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
|
|
||||||
- name: Run pre-commit
|
- name: Run pre-commit
|
||||||
if: steps.check_author.outputs.authorized == 'true'
|
if: steps.check_author.outputs.authorized == 'true'
|
||||||
|
|
|
||||||
32
.github/workflows/providers-build.yml
vendored
32
.github/workflows/providers-build.yml
vendored
|
|
@ -7,24 +7,24 @@ on:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'src/llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'src/llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/core/build.*'
|
- 'src/llama_stack/core/build.*'
|
||||||
- 'llama_stack/core/*.sh'
|
- 'src/llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'src/llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'containers/Containerfile'
|
- 'containers/Containerfile'
|
||||||
- '.dockerignore'
|
- '.dockerignore'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/build.py'
|
- 'src/llama_stack/cli/stack/build.py'
|
||||||
- 'llama_stack/cli/stack/_build.py'
|
- 'src/llama_stack/cli/stack/_build.py'
|
||||||
- 'llama_stack/core/build.*'
|
- 'src/llama_stack/core/build.*'
|
||||||
- 'llama_stack/core/*.sh'
|
- 'src/llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-build.yml'
|
- '.github/workflows/providers-build.yml'
|
||||||
- 'llama_stack/distributions/**'
|
- 'src/llama_stack/distributions/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
- 'containers/Containerfile'
|
- 'containers/Containerfile'
|
||||||
- '.dockerignore'
|
- '.dockerignore'
|
||||||
|
|
@ -45,7 +45,7 @@ jobs:
|
||||||
- name: Generate Distribution List
|
- name: Generate Distribution List
|
||||||
id: set-matrix
|
id: set-matrix
|
||||||
run: |
|
run: |
|
||||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
build:
|
build:
|
||||||
|
|
@ -107,13 +107,13 @@ jobs:
|
||||||
|
|
||||||
- name: Build container image
|
- name: Build container image
|
||||||
run: |
|
run: |
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "python:3.12-slim"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||||
docker build . \
|
docker build . \
|
||||||
-f containers/Containerfile \
|
-f containers/Containerfile \
|
||||||
--build-arg INSTALL_MODE=editable \
|
--build-arg INSTALL_MODE=editable \
|
||||||
--build-arg DISTRO_NAME=ci-tests \
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
-t llama-stack:ci-tests
|
-t llama-stack:ci-tests
|
||||||
|
|
||||||
- name: Inspect the container image entrypoint
|
- name: Inspect the container image entrypoint
|
||||||
|
|
@ -143,17 +143,17 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
yq -i '
|
yq -i '
|
||||||
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
.distribution_spec.container_image = "registry.access.redhat.com/ubi9:latest"
|
||||||
' llama_stack/distributions/ci-tests/build.yaml
|
' src/llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
||||||
- name: Build UBI9 container image
|
- name: Build UBI9 container image
|
||||||
run: |
|
run: |
|
||||||
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' llama_stack/distributions/ci-tests/build.yaml)
|
BASE_IMAGE=$(yq -r '.distribution_spec.container_image // "registry.access.redhat.com/ubi9:latest"' src/llama_stack/distributions/ci-tests/build.yaml)
|
||||||
docker build . \
|
docker build . \
|
||||||
-f containers/Containerfile \
|
-f containers/Containerfile \
|
||||||
--build-arg INSTALL_MODE=editable \
|
--build-arg INSTALL_MODE=editable \
|
||||||
--build-arg DISTRO_NAME=ci-tests \
|
--build-arg DISTRO_NAME=ci-tests \
|
||||||
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
--build-arg BASE_IMAGE="$BASE_IMAGE" \
|
||||||
--build-arg RUN_CONFIG_PATH=/workspace/llama_stack/distributions/ci-tests/run.yaml \
|
--build-arg RUN_CONFIG_PATH=/workspace/src/llama_stack/distributions/ci-tests/run.yaml \
|
||||||
-t llama-stack:ci-tests-ubi9
|
-t llama-stack:ci-tests-ubi9
|
||||||
|
|
||||||
- name: Inspect UBI9 image
|
- name: Inspect UBI9 image
|
||||||
|
|
|
||||||
24
.github/workflows/providers-list-deps.yml
vendored
24
.github/workflows/providers-list-deps.yml
vendored
|
|
@ -7,22 +7,22 @@ on:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/list_deps.py'
|
- 'src/llama_stack/cli/stack/list_deps.py'
|
||||||
- 'llama_stack/cli/stack/_list_deps.py'
|
- 'src/llama_stack/cli/stack/_list_deps.py'
|
||||||
- 'llama_stack/core/build.*'
|
- 'src/llama_stack/core/build.*'
|
||||||
- 'llama_stack/core/*.sh'
|
- 'src/llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-list-deps.yml'
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'src/llama_stack/templates/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/cli/stack/list_deps.py'
|
- 'src/llama_stack/cli/stack/list_deps.py'
|
||||||
- 'llama_stack/cli/stack/_list_deps.py'
|
- 'src/llama_stack/cli/stack/_list_deps.py'
|
||||||
- 'llama_stack/core/build.*'
|
- 'src/llama_stack/core/build.*'
|
||||||
- 'llama_stack/core/*.sh'
|
- 'src/llama_stack/core/*.sh'
|
||||||
- '.github/workflows/providers-list-deps.yml'
|
- '.github/workflows/providers-list-deps.yml'
|
||||||
- 'llama_stack/templates/**'
|
- 'src/llama_stack/templates/**'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
|
|
@ -41,7 +41,7 @@ jobs:
|
||||||
- name: Generate Distribution List
|
- name: Generate Distribution List
|
||||||
id: set-matrix
|
id: set-matrix
|
||||||
run: |
|
run: |
|
||||||
distros=$(ls llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
distros=$(ls src/llama_stack/distributions/*/*build.yaml | awk -F'/' '{print $(NF-1)}' | jq -R -s -c 'split("\n")[:-1]')
|
||||||
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
echo "distros=$distros" >> "$GITHUB_OUTPUT"
|
||||||
|
|
||||||
list-deps:
|
list-deps:
|
||||||
|
|
@ -102,4 +102,4 @@ jobs:
|
||||||
USE_COPY_NOT_MOUNT: "true"
|
USE_COPY_NOT_MOUNT: "true"
|
||||||
LLAMA_STACK_DIR: "."
|
LLAMA_STACK_DIR: "."
|
||||||
run: |
|
run: |
|
||||||
uv run llama stack list-deps llama_stack/distributions/ci-tests/build.yaml
|
uv run llama stack list-deps src/llama_stack/distributions/ci-tests/build.yaml
|
||||||
|
|
|
||||||
4
.github/workflows/python-build-test.yml
vendored
4
.github/workflows/python-build-test.yml
vendored
|
|
@ -10,7 +10,7 @@ on:
|
||||||
branches:
|
branches:
|
||||||
- main
|
- main
|
||||||
paths-ignore:
|
paths-ignore:
|
||||||
- 'llama_stack/ui/**'
|
- 'src/llama_stack/ui/**'
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
build:
|
build:
|
||||||
|
|
@ -24,7 +24,7 @@ jobs:
|
||||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||||
|
|
||||||
- name: Install uv
|
- name: Install uv
|
||||||
uses: astral-sh/setup-uv@3259c6206f993105e3a61b142c2d97bf4b9ef83d # v7.1.0
|
uses: astral-sh/setup-uv@2ddd2b9cb38ad8efd50337e8ab201519a34c9f24 # v7.1.1
|
||||||
with:
|
with:
|
||||||
python-version: ${{ matrix.python-version }}
|
python-version: ${{ matrix.python-version }}
|
||||||
activate-environment: true
|
activate-environment: true
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- 'tests/integration/**'
|
- 'tests/integration/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -78,7 +78,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
- name: Upload all logs to artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||||
with:
|
with:
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test
|
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-provider-module-test
|
||||||
path: |
|
path: |
|
||||||
|
|
|
||||||
6
.github/workflows/test-external.yml
vendored
6
.github/workflows/test-external.yml
vendored
|
|
@ -8,8 +8,8 @@ on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- '!llama_stack/ui/**'
|
- '!src/llama_stack/ui/**'
|
||||||
- 'tests/integration/**'
|
- 'tests/integration/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -84,7 +84,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload all logs to artifacts
|
- name: Upload all logs to artifacts
|
||||||
if: ${{ always() }}
|
if: ${{ always() }}
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||||
with:
|
with:
|
||||||
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test
|
name: logs-${{ github.run_id }}-${{ github.run_attempt }}-external-test
|
||||||
path: |
|
path: |
|
||||||
|
|
|
||||||
12
.github/workflows/ui-unit-tests.yml
vendored
12
.github/workflows/ui-unit-tests.yml
vendored
|
|
@ -8,7 +8,7 @@ on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches: [ main ]
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/ui/**'
|
- 'src/llama_stack/ui/**'
|
||||||
- '.github/workflows/ui-unit-tests.yml' # This workflow
|
- '.github/workflows/ui-unit-tests.yml' # This workflow
|
||||||
workflow_dispatch:
|
workflow_dispatch:
|
||||||
|
|
||||||
|
|
@ -33,22 +33,22 @@ jobs:
|
||||||
with:
|
with:
|
||||||
node-version: ${{ matrix.node-version }}
|
node-version: ${{ matrix.node-version }}
|
||||||
cache: 'npm'
|
cache: 'npm'
|
||||||
cache-dependency-path: 'llama_stack/ui/package-lock.json'
|
cache-dependency-path: 'src/llama_stack/ui/package-lock.json'
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
run: npm ci
|
run: npm ci
|
||||||
|
|
||||||
- name: Run linting
|
- name: Run linting
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
run: npm run lint
|
run: npm run lint
|
||||||
|
|
||||||
- name: Run format check
|
- name: Run format check
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
run: npm run format:check
|
run: npm run format:check
|
||||||
|
|
||||||
- name: Run unit tests
|
- name: Run unit tests
|
||||||
working-directory: llama_stack/ui
|
working-directory: src/llama_stack/ui
|
||||||
env:
|
env:
|
||||||
CI: true
|
CI: true
|
||||||
|
|
||||||
|
|
|
||||||
14
.github/workflows/unit-tests.yml
vendored
14
.github/workflows/unit-tests.yml
vendored
|
|
@ -4,12 +4,16 @@ run-name: Run the unit test suite
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
pull_request:
|
pull_request:
|
||||||
branches: [ main ]
|
branches:
|
||||||
|
- main
|
||||||
|
- 'release-[0-9]+.[0-9]+.x'
|
||||||
paths:
|
paths:
|
||||||
- 'llama_stack/**'
|
- 'src/llama_stack/**'
|
||||||
- '!llama_stack/ui/**'
|
- '!src/llama_stack/ui/**'
|
||||||
- 'tests/unit/**'
|
- 'tests/unit/**'
|
||||||
- 'uv.lock'
|
- 'uv.lock'
|
||||||
- 'pyproject.toml'
|
- 'pyproject.toml'
|
||||||
|
|
@ -45,7 +49,7 @@ jobs:
|
||||||
|
|
||||||
- name: Upload test results
|
- name: Upload test results
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
uses: actions/upload-artifact@330a01c490aca151604b8cf639adc76d48f6c5d4 # v5.0.0
|
||||||
with:
|
with:
|
||||||
name: test-results-${{ matrix.python }}
|
name: test-results-${{ matrix.python }}
|
||||||
path: |
|
path: |
|
||||||
|
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -32,3 +32,6 @@ CLAUDE.md
|
||||||
docs/.docusaurus/
|
docs/.docusaurus/
|
||||||
docs/node_modules/
|
docs/node_modules/
|
||||||
docs/static/imported-files/
|
docs/static/imported-files/
|
||||||
|
docs/docs/api-deprecated/
|
||||||
|
docs/docs/api-experimental/
|
||||||
|
docs/docs/api/
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,7 @@ repos:
|
||||||
hooks:
|
hooks:
|
||||||
- id: ruff
|
- id: ruff
|
||||||
args: [ --fix ]
|
args: [ --fix ]
|
||||||
exclude: ^llama_stack/strong_typing/.*$
|
exclude: ^src/llama_stack/strong_typing/.*$
|
||||||
- id: ruff-format
|
- id: ruff-format
|
||||||
|
|
||||||
- repo: https://github.com/adamchainz/blacken-docs
|
- repo: https://github.com/adamchainz/blacken-docs
|
||||||
|
|
@ -58,18 +58,27 @@ repos:
|
||||||
- id: uv-lock
|
- id: uv-lock
|
||||||
|
|
||||||
- repo: https://github.com/pre-commit/mirrors-mypy
|
- repo: https://github.com/pre-commit/mirrors-mypy
|
||||||
rev: v1.16.1
|
rev: v1.18.2
|
||||||
hooks:
|
hooks:
|
||||||
- id: mypy
|
- id: mypy
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
- uv==0.6.2
|
- uv==0.6.2
|
||||||
- mypy
|
|
||||||
- pytest
|
- pytest
|
||||||
- rich
|
- rich
|
||||||
- types-requests
|
- types-requests
|
||||||
- pydantic
|
- pydantic
|
||||||
|
- httpx
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
|
|
||||||
|
- repo: local
|
||||||
|
hooks:
|
||||||
|
- id: mypy-full
|
||||||
|
name: mypy (full type_checking)
|
||||||
|
entry: uv run --group dev --group type_checking mypy
|
||||||
|
language: system
|
||||||
|
pass_filenames: false
|
||||||
|
stages: [manual]
|
||||||
|
|
||||||
# - repo: https://github.com/tcort/markdown-link-check
|
# - repo: https://github.com/tcort/markdown-link-check
|
||||||
# rev: v3.11.2
|
# rev: v3.11.2
|
||||||
# hooks:
|
# hooks:
|
||||||
|
|
@ -86,7 +95,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/distributions/.*$|^llama_stack/providers/.*/inference/.*/models\.py$
|
files: ^src/llama_stack/distributions/.*$|^src/llama_stack/providers/.*/inference/.*/models\.py$
|
||||||
- id: provider-codegen
|
- id: provider-codegen
|
||||||
name: Provider Codegen
|
name: Provider Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
@ -95,7 +104,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/providers/.*$
|
files: ^src/llama_stack/providers/.*$
|
||||||
- id: openapi-codegen
|
- id: openapi-codegen
|
||||||
name: API Spec Codegen
|
name: API Spec Codegen
|
||||||
additional_dependencies:
|
additional_dependencies:
|
||||||
|
|
@ -104,7 +113,7 @@ repos:
|
||||||
language: python
|
language: python
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
files: ^llama_stack/apis/|^docs/openapi_generator/
|
files: ^src/llama_stack/apis/|^docs/openapi_generator/
|
||||||
- id: check-workflows-use-hashes
|
- id: check-workflows-use-hashes
|
||||||
name: Check GitHub Actions use SHA-pinned actions
|
name: Check GitHub Actions use SHA-pinned actions
|
||||||
entry: ./scripts/check-workflows-use-hashes.sh
|
entry: ./scripts/check-workflows-use-hashes.sh
|
||||||
|
|
@ -120,7 +129,7 @@ repos:
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
always_run: true
|
always_run: true
|
||||||
files: ^llama_stack/.*$
|
files: ^src/llama_stack/.*$
|
||||||
- id: forbid-pytest-asyncio
|
- id: forbid-pytest-asyncio
|
||||||
name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
|
name: Block @pytest.mark.asyncio and @pytest_asyncio.fixture
|
||||||
entry: bash
|
entry: bash
|
||||||
|
|
@ -150,10 +159,9 @@ repos:
|
||||||
name: Format & Lint UI
|
name: Format & Lint UI
|
||||||
entry: bash ./scripts/run-ui-linter.sh
|
entry: bash ./scripts/run-ui-linter.sh
|
||||||
language: system
|
language: system
|
||||||
files: ^llama_stack/ui/.*\.(ts|tsx)$
|
files: ^src/llama_stack/ui/.*\.(ts|tsx)$
|
||||||
pass_filenames: false
|
pass_filenames: false
|
||||||
require_serial: true
|
require_serial: true
|
||||||
|
|
||||||
- id: check-log-usage
|
- id: check-log-usage
|
||||||
name: Ensure 'llama_stack.log' usage for logging
|
name: Ensure 'llama_stack.log' usage for logging
|
||||||
entry: bash
|
entry: bash
|
||||||
|
|
@ -172,7 +180,23 @@ repos:
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
exit 0
|
exit 0
|
||||||
|
- id: fips-compliance
|
||||||
|
name: Ensure llama-stack remains FIPS compliant
|
||||||
|
entry: bash
|
||||||
|
language: system
|
||||||
|
types: [python]
|
||||||
|
pass_filenames: true
|
||||||
|
exclude: '^tests/.*$' # Exclude test dir as some safety tests used MD5
|
||||||
|
args:
|
||||||
|
- -c
|
||||||
|
- |
|
||||||
|
grep -EnH '^[^#]*\b(md5|sha1|uuid3|uuid5)\b' "$@" && {
|
||||||
|
echo;
|
||||||
|
echo "❌ Do not use any of the following functions: hashlib.md5, hashlib.sha1, uuid.uuid3, uuid.uuid5"
|
||||||
|
echo " These functions are not FIPS-compliant"
|
||||||
|
echo;
|
||||||
|
exit 1;
|
||||||
|
} || true
|
||||||
ci:
|
ci:
|
||||||
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
autofix_commit_msg: 🎨 [pre-commit.ci] Auto format from pre-commit.com hooks
|
||||||
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
autoupdate_commit_msg: ⬆ [pre-commit.ci] pre-commit autoupdate
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,18 @@ uv run pre-commit run --all-files -v
|
||||||
|
|
||||||
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
|
The `-v` (verbose) parameter is optional but often helpful for getting more information about any issues with that the pre-commit checks identify.
|
||||||
|
|
||||||
|
To run the expanded mypy configuration that CI enforces, use:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run pre-commit run mypy-full --hook-stage manual --all-files
|
||||||
|
```
|
||||||
|
|
||||||
|
or invoke mypy directly with all optional dependencies:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
uv run --group dev --group type_checking mypy
|
||||||
|
```
|
||||||
|
|
||||||
```{caution}
|
```{caution}
|
||||||
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
Before pushing your changes, make sure that the pre-commit hooks have passed successfully.
|
||||||
```
|
```
|
||||||
|
|
|
||||||
18
MANIFEST.in
18
MANIFEST.in
|
|
@ -1,11 +1,11 @@
|
||||||
include pyproject.toml
|
include pyproject.toml
|
||||||
include llama_stack/models/llama/llama3/tokenizer.model
|
include src/llama_stack/models/llama/llama3/tokenizer.model
|
||||||
include llama_stack/models/llama/llama4/tokenizer.model
|
include src/llama_stack/models/llama/llama4/tokenizer.model
|
||||||
include llama_stack/core/*.sh
|
include src/llama_stack/core/*.sh
|
||||||
include llama_stack/cli/scripts/*.sh
|
include src/llama_stack/cli/scripts/*.sh
|
||||||
include llama_stack/distributions/*/*.yaml
|
include src/llama_stack/distributions/*/*.yaml
|
||||||
exclude llama_stack/distributions/ci-tests
|
exclude src/llama_stack/distributions/ci-tests
|
||||||
include tests/integration/test_cases/inference/*.json
|
include tests/integration/test_cases/inference/*.json
|
||||||
include llama_stack/models/llama/*/*.md
|
include src/llama_stack/models/llama/*/*.md
|
||||||
include llama_stack/tests/integration/*.jpg
|
include src/llama_stack/tests/integration/*.jpg
|
||||||
prune llama_stack/distributions/ci-tests
|
prune src/llama_stack/distributions/ci-tests
|
||||||
|
|
|
||||||
|
|
@ -44,14 +44,6 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
files:
|
|
||||||
- provider_id: meta-reference-files
|
|
||||||
provider_type: inline::localfs
|
|
||||||
config:
|
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
|
||||||
metadata_store:
|
|
||||||
type: sqlite
|
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/starter}/files_metadata.db
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
@ -115,13 +107,21 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
|
|
@ -36,14 +36,6 @@ providers:
|
||||||
persistence:
|
persistence:
|
||||||
namespace: vector_io::chroma_remote
|
namespace: vector_io::chroma_remote
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
files:
|
|
||||||
- provider_id: meta-reference-files
|
|
||||||
provider_type: inline::localfs
|
|
||||||
config:
|
|
||||||
storage_dir: ${env.FILES_STORAGE_DIR:=~/.llama/distributions/starter/files}
|
|
||||||
metadata_store:
|
|
||||||
table_name: files_metadata
|
|
||||||
backend: sql_default
|
|
||||||
safety:
|
safety:
|
||||||
- provider_id: llama-guard
|
- provider_id: llama-guard
|
||||||
provider_type: inline::llama-guard
|
provider_type: inline::llama-guard
|
||||||
|
|
@ -108,6 +100,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
|
|
|
||||||
|
|
@ -1,610 +0,0 @@
|
||||||
# yaml-language-server: $schema=https://app.stainlessapi.com/config-internal.schema.json
|
|
||||||
|
|
||||||
organization:
|
|
||||||
# Name of your organization or company, used to determine the name of the client
|
|
||||||
# and headings.
|
|
||||||
name: llama-stack-client
|
|
||||||
docs: https://llama-stack.readthedocs.io/en/latest/
|
|
||||||
contact: llamastack@meta.com
|
|
||||||
security:
|
|
||||||
- {}
|
|
||||||
- BearerAuth: []
|
|
||||||
security_schemes:
|
|
||||||
BearerAuth:
|
|
||||||
type: http
|
|
||||||
scheme: bearer
|
|
||||||
# `targets` define the output targets and their customization options, such as
|
|
||||||
# whether to emit the Node SDK and what it's package name should be.
|
|
||||||
targets:
|
|
||||||
node:
|
|
||||||
package_name: llama-stack-client
|
|
||||||
production_repo: llamastack/llama-stack-client-typescript
|
|
||||||
publish:
|
|
||||||
npm: false
|
|
||||||
python:
|
|
||||||
package_name: llama_stack_client
|
|
||||||
production_repo: llamastack/llama-stack-client-python
|
|
||||||
options:
|
|
||||||
use_uv: true
|
|
||||||
publish:
|
|
||||||
pypi: true
|
|
||||||
project_name: llama_stack_client
|
|
||||||
kotlin:
|
|
||||||
reverse_domain: com.llama_stack_client.api
|
|
||||||
production_repo: null
|
|
||||||
publish:
|
|
||||||
maven: false
|
|
||||||
go:
|
|
||||||
package_name: llama-stack-client
|
|
||||||
production_repo: llamastack/llama-stack-client-go
|
|
||||||
options:
|
|
||||||
enable_v2: true
|
|
||||||
back_compat_use_shared_package: false
|
|
||||||
|
|
||||||
# `client_settings` define settings for the API client, such as extra constructor
|
|
||||||
# arguments (used for authentication), retry behavior, idempotency, etc.
|
|
||||||
client_settings:
|
|
||||||
default_env_prefix: LLAMA_STACK_CLIENT
|
|
||||||
opts:
|
|
||||||
api_key:
|
|
||||||
type: string
|
|
||||||
read_env: LLAMA_STACK_CLIENT_API_KEY
|
|
||||||
auth: { security_scheme: BearerAuth }
|
|
||||||
nullable: true
|
|
||||||
|
|
||||||
# `environments` are a map of the name of the environment (e.g. "sandbox",
|
|
||||||
# "production") to the corresponding url to use.
|
|
||||||
environments:
|
|
||||||
production: http://any-hosted-llama-stack.com
|
|
||||||
|
|
||||||
# `pagination` defines [pagination schemes] which provides a template to match
|
|
||||||
# endpoints and generate next-page and auto-pagination helpers in the SDKs.
|
|
||||||
pagination:
|
|
||||||
- name: datasets_iterrows
|
|
||||||
type: offset
|
|
||||||
request:
|
|
||||||
dataset_id:
|
|
||||||
type: string
|
|
||||||
start_index:
|
|
||||||
type: integer
|
|
||||||
x-stainless-pagination-property:
|
|
||||||
purpose: offset_count_param
|
|
||||||
limit:
|
|
||||||
type: integer
|
|
||||||
response:
|
|
||||||
data:
|
|
||||||
type: array
|
|
||||||
items:
|
|
||||||
type: object
|
|
||||||
next_index:
|
|
||||||
type: integer
|
|
||||||
x-stainless-pagination-property:
|
|
||||||
purpose: offset_count_start_field
|
|
||||||
- name: openai_cursor_page
|
|
||||||
type: cursor
|
|
||||||
request:
|
|
||||||
limit:
|
|
||||||
type: integer
|
|
||||||
after:
|
|
||||||
type: string
|
|
||||||
x-stainless-pagination-property:
|
|
||||||
purpose: next_cursor_param
|
|
||||||
response:
|
|
||||||
data:
|
|
||||||
type: array
|
|
||||||
items: {}
|
|
||||||
has_more:
|
|
||||||
type: boolean
|
|
||||||
last_id:
|
|
||||||
type: string
|
|
||||||
x-stainless-pagination-property:
|
|
||||||
purpose: next_cursor_field
|
|
||||||
# `resources` define the structure and organziation for your API, such as how
|
|
||||||
# methods and models are grouped together and accessed. See the [configuration
|
|
||||||
# guide] for more information.
|
|
||||||
#
|
|
||||||
# [configuration guide]:
|
|
||||||
# https://app.stainlessapi.com/docs/guides/configure#resources
|
|
||||||
resources:
|
|
||||||
$shared:
|
|
||||||
models:
|
|
||||||
agent_config: AgentConfig
|
|
||||||
interleaved_content_item: InterleavedContentItem
|
|
||||||
interleaved_content: InterleavedContent
|
|
||||||
param_type: ParamType
|
|
||||||
safety_violation: SafetyViolation
|
|
||||||
sampling_params: SamplingParams
|
|
||||||
scoring_result: ScoringResult
|
|
||||||
message: Message
|
|
||||||
user_message: UserMessage
|
|
||||||
completion_message: CompletionMessage
|
|
||||||
tool_response_message: ToolResponseMessage
|
|
||||||
system_message: SystemMessage
|
|
||||||
tool_call: ToolCall
|
|
||||||
query_result: RAGQueryResult
|
|
||||||
document: RAGDocument
|
|
||||||
query_config: RAGQueryConfig
|
|
||||||
response_format: ResponseFormat
|
|
||||||
toolgroups:
|
|
||||||
models:
|
|
||||||
tool_group: ToolGroup
|
|
||||||
list_tool_groups_response: ListToolGroupsResponse
|
|
||||||
methods:
|
|
||||||
register: post /v1/toolgroups
|
|
||||||
get: get /v1/toolgroups/{toolgroup_id}
|
|
||||||
list: get /v1/toolgroups
|
|
||||||
unregister: delete /v1/toolgroups/{toolgroup_id}
|
|
||||||
tools:
|
|
||||||
methods:
|
|
||||||
get: get /v1/tools/{tool_name}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/tools
|
|
||||||
paginated: false
|
|
||||||
|
|
||||||
tool_runtime:
|
|
||||||
models:
|
|
||||||
tool_def: ToolDef
|
|
||||||
tool_invocation_result: ToolInvocationResult
|
|
||||||
methods:
|
|
||||||
list_tools:
|
|
||||||
endpoint: get /v1/tool-runtime/list-tools
|
|
||||||
paginated: false
|
|
||||||
invoke_tool: post /v1/tool-runtime/invoke
|
|
||||||
subresources:
|
|
||||||
rag_tool:
|
|
||||||
methods:
|
|
||||||
insert: post /v1/tool-runtime/rag-tool/insert
|
|
||||||
query: post /v1/tool-runtime/rag-tool/query
|
|
||||||
|
|
||||||
responses:
|
|
||||||
models:
|
|
||||||
response_object_stream: OpenAIResponseObjectStream
|
|
||||||
response_object: OpenAIResponseObject
|
|
||||||
methods:
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/responses
|
|
||||||
streaming:
|
|
||||||
stream_event_model: responses.response_object_stream
|
|
||||||
param_discriminator: stream
|
|
||||||
retrieve: get /v1/responses/{response_id}
|
|
||||||
list:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/responses
|
|
||||||
delete:
|
|
||||||
type: http
|
|
||||||
endpoint: delete /v1/responses/{response_id}
|
|
||||||
subresources:
|
|
||||||
input_items:
|
|
||||||
methods:
|
|
||||||
list:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/responses/{response_id}/input_items
|
|
||||||
|
|
||||||
conversations:
|
|
||||||
models:
|
|
||||||
conversation_object: Conversation
|
|
||||||
methods:
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/conversations
|
|
||||||
retrieve: get /v1/conversations/{conversation_id}
|
|
||||||
update:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/conversations/{conversation_id}
|
|
||||||
delete:
|
|
||||||
type: http
|
|
||||||
endpoint: delete /v1/conversations/{conversation_id}
|
|
||||||
subresources:
|
|
||||||
items:
|
|
||||||
methods:
|
|
||||||
get:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/conversations/{conversation_id}/items/{item_id}
|
|
||||||
list:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/conversations/{conversation_id}/items
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/conversations/{conversation_id}/items
|
|
||||||
|
|
||||||
inspect:
|
|
||||||
models:
|
|
||||||
healthInfo: HealthInfo
|
|
||||||
providerInfo: ProviderInfo
|
|
||||||
routeInfo: RouteInfo
|
|
||||||
versionInfo: VersionInfo
|
|
||||||
methods:
|
|
||||||
health: get /v1/health
|
|
||||||
version: get /v1/version
|
|
||||||
|
|
||||||
embeddings:
|
|
||||||
models:
|
|
||||||
create_embeddings_response: OpenAIEmbeddingsResponse
|
|
||||||
methods:
|
|
||||||
create: post /v1/embeddings
|
|
||||||
|
|
||||||
chat:
|
|
||||||
models:
|
|
||||||
chat_completion_chunk: OpenAIChatCompletionChunk
|
|
||||||
subresources:
|
|
||||||
completions:
|
|
||||||
methods:
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/chat/completions
|
|
||||||
streaming:
|
|
||||||
stream_event_model: chat.chat_completion_chunk
|
|
||||||
param_discriminator: stream
|
|
||||||
list:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/chat/completions
|
|
||||||
retrieve:
|
|
||||||
type: http
|
|
||||||
endpoint: get /v1/chat/completions/{completion_id}
|
|
||||||
completions:
|
|
||||||
methods:
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1/completions
|
|
||||||
streaming:
|
|
||||||
param_discriminator: stream
|
|
||||||
|
|
||||||
vector_io:
|
|
||||||
models:
|
|
||||||
queryChunksResponse: QueryChunksResponse
|
|
||||||
methods:
|
|
||||||
insert: post /v1/vector-io/insert
|
|
||||||
query: post /v1/vector-io/query
|
|
||||||
|
|
||||||
vector_stores:
|
|
||||||
models:
|
|
||||||
vector_store: VectorStoreObject
|
|
||||||
list_vector_stores_response: VectorStoreListResponse
|
|
||||||
vector_store_delete_response: VectorStoreDeleteResponse
|
|
||||||
vector_store_search_response: VectorStoreSearchResponsePage
|
|
||||||
methods:
|
|
||||||
create: post /v1/vector_stores
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/vector_stores
|
|
||||||
retrieve: get /v1/vector_stores/{vector_store_id}
|
|
||||||
update: post /v1/vector_stores/{vector_store_id}
|
|
||||||
delete: delete /v1/vector_stores/{vector_store_id}
|
|
||||||
search: post /v1/vector_stores/{vector_store_id}/search
|
|
||||||
subresources:
|
|
||||||
files:
|
|
||||||
models:
|
|
||||||
vector_store_file: VectorStoreFileObject
|
|
||||||
methods:
|
|
||||||
list: get /v1/vector_stores/{vector_store_id}/files
|
|
||||||
retrieve: get /v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
||||||
update: post /v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
||||||
delete: delete /v1/vector_stores/{vector_store_id}/files/{file_id}
|
|
||||||
create: post /v1/vector_stores/{vector_store_id}/files
|
|
||||||
content: get /v1/vector_stores/{vector_store_id}/files/{file_id}/content
|
|
||||||
file_batches:
|
|
||||||
models:
|
|
||||||
vector_store_file_batches: VectorStoreFileBatchObject
|
|
||||||
list_vector_store_files_in_batch_response: VectorStoreFilesListInBatchResponse
|
|
||||||
methods:
|
|
||||||
create: post /v1/vector_stores/{vector_store_id}/file_batches
|
|
||||||
retrieve: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}
|
|
||||||
list_files: get /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/files
|
|
||||||
cancel: post /v1/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel
|
|
||||||
|
|
||||||
models:
|
|
||||||
models:
|
|
||||||
model: Model
|
|
||||||
list_models_response: ListModelsResponse
|
|
||||||
methods:
|
|
||||||
retrieve: get /v1/models/{model_id}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/models
|
|
||||||
paginated: false
|
|
||||||
register: post /v1/models
|
|
||||||
unregister: delete /v1/models/{model_id}
|
|
||||||
subresources:
|
|
||||||
openai:
|
|
||||||
methods:
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/models
|
|
||||||
paginated: false
|
|
||||||
|
|
||||||
providers:
|
|
||||||
models:
|
|
||||||
list_providers_response: ListProvidersResponse
|
|
||||||
methods:
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/providers
|
|
||||||
paginated: false
|
|
||||||
retrieve: get /v1/providers/{provider_id}
|
|
||||||
|
|
||||||
routes:
|
|
||||||
models:
|
|
||||||
list_routes_response: ListRoutesResponse
|
|
||||||
methods:
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/inspect/routes
|
|
||||||
paginated: false
|
|
||||||
|
|
||||||
|
|
||||||
moderations:
|
|
||||||
models:
|
|
||||||
create_response: ModerationObject
|
|
||||||
methods:
|
|
||||||
create: post /v1/moderations
|
|
||||||
|
|
||||||
|
|
||||||
safety:
|
|
||||||
models:
|
|
||||||
run_shield_response: RunShieldResponse
|
|
||||||
methods:
|
|
||||||
run_shield: post /v1/safety/run-shield
|
|
||||||
|
|
||||||
|
|
||||||
shields:
|
|
||||||
models:
|
|
||||||
shield: Shield
|
|
||||||
list_shields_response: ListShieldsResponse
|
|
||||||
methods:
|
|
||||||
retrieve: get /v1/shields/{identifier}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/shields
|
|
||||||
paginated: false
|
|
||||||
register: post /v1/shields
|
|
||||||
delete: delete /v1/shields/{identifier}
|
|
||||||
|
|
||||||
synthetic_data_generation:
|
|
||||||
models:
|
|
||||||
syntheticDataGenerationResponse: SyntheticDataGenerationResponse
|
|
||||||
methods:
|
|
||||||
generate: post /v1/synthetic-data-generation/generate
|
|
||||||
|
|
||||||
telemetry:
|
|
||||||
models:
|
|
||||||
span_with_status: SpanWithStatus
|
|
||||||
trace: Trace
|
|
||||||
query_spans_response: QuerySpansResponse
|
|
||||||
event: Event
|
|
||||||
query_condition: QueryCondition
|
|
||||||
methods:
|
|
||||||
query_traces:
|
|
||||||
endpoint: post /v1alpha/telemetry/traces
|
|
||||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
|
||||||
get_span_tree: post /v1alpha/telemetry/spans/{span_id}/tree
|
|
||||||
query_spans:
|
|
||||||
endpoint: post /v1alpha/telemetry/spans
|
|
||||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
|
||||||
query_metrics:
|
|
||||||
endpoint: post /v1alpha/telemetry/metrics/{metric_name}
|
|
||||||
skip_test_reason: 'unsupported query params in java / kotlin'
|
|
||||||
# log_event: post /v1alpha/telemetry/events
|
|
||||||
save_spans_to_dataset: post /v1alpha/telemetry/spans/export
|
|
||||||
get_span: get /v1alpha/telemetry/traces/{trace_id}/spans/{span_id}
|
|
||||||
get_trace: get /v1alpha/telemetry/traces/{trace_id}
|
|
||||||
|
|
||||||
scoring:
|
|
||||||
methods:
|
|
||||||
score: post /v1/scoring/score
|
|
||||||
score_batch: post /v1/scoring/score-batch
|
|
||||||
scoring_functions:
|
|
||||||
methods:
|
|
||||||
retrieve: get /v1/scoring-functions/{scoring_fn_id}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1/scoring-functions
|
|
||||||
paginated: false
|
|
||||||
register: post /v1/scoring-functions
|
|
||||||
models:
|
|
||||||
scoring_fn: ScoringFn
|
|
||||||
scoring_fn_params: ScoringFnParams
|
|
||||||
list_scoring_functions_response: ListScoringFunctionsResponse
|
|
||||||
|
|
||||||
benchmarks:
|
|
||||||
methods:
|
|
||||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1alpha/eval/benchmarks
|
|
||||||
paginated: false
|
|
||||||
register: post /v1alpha/eval/benchmarks
|
|
||||||
models:
|
|
||||||
benchmark: Benchmark
|
|
||||||
list_benchmarks_response: ListBenchmarksResponse
|
|
||||||
|
|
||||||
files:
|
|
||||||
methods:
|
|
||||||
create: post /v1/files
|
|
||||||
list: get /v1/files
|
|
||||||
retrieve: get /v1/files/{file_id}
|
|
||||||
delete: delete /v1/files/{file_id}
|
|
||||||
content: get /v1/files/{file_id}/content
|
|
||||||
models:
|
|
||||||
file: OpenAIFileObject
|
|
||||||
list_files_response: ListOpenAIFileResponse
|
|
||||||
delete_file_response: OpenAIFileDeleteResponse
|
|
||||||
|
|
||||||
alpha:
|
|
||||||
subresources:
|
|
||||||
inference:
|
|
||||||
methods:
|
|
||||||
rerank: post /v1alpha/inference/rerank
|
|
||||||
|
|
||||||
post_training:
|
|
||||||
models:
|
|
||||||
algorithm_config: AlgorithmConfig
|
|
||||||
post_training_job: PostTrainingJob
|
|
||||||
list_post_training_jobs_response: ListPostTrainingJobsResponse
|
|
||||||
methods:
|
|
||||||
preference_optimize: post /v1alpha/post-training/preference-optimize
|
|
||||||
supervised_fine_tune: post /v1alpha/post-training/supervised-fine-tune
|
|
||||||
subresources:
|
|
||||||
job:
|
|
||||||
methods:
|
|
||||||
artifacts: get /v1alpha/post-training/job/artifacts
|
|
||||||
cancel: post /v1alpha/post-training/job/cancel
|
|
||||||
status: get /v1alpha/post-training/job/status
|
|
||||||
list:
|
|
||||||
endpoint: get /v1alpha/post-training/jobs
|
|
||||||
paginated: false
|
|
||||||
|
|
||||||
eval:
|
|
||||||
methods:
|
|
||||||
evaluate_rows: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
|
||||||
run_eval: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
|
||||||
evaluate_rows_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/evaluations
|
|
||||||
run_eval_alpha: post /v1alpha/eval/benchmarks/{benchmark_id}/jobs
|
|
||||||
|
|
||||||
subresources:
|
|
||||||
jobs:
|
|
||||||
methods:
|
|
||||||
cancel: delete /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
|
||||||
status: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}
|
|
||||||
retrieve: get /v1alpha/eval/benchmarks/{benchmark_id}/jobs/{job_id}/result
|
|
||||||
models:
|
|
||||||
evaluate_response: EvaluateResponse
|
|
||||||
benchmark_config: BenchmarkConfig
|
|
||||||
job: Job
|
|
||||||
|
|
||||||
agents:
|
|
||||||
methods:
|
|
||||||
create: post /v1alpha/agents
|
|
||||||
list: get /v1alpha/agents
|
|
||||||
retrieve: get /v1alpha/agents/{agent_id}
|
|
||||||
delete: delete /v1alpha/agents/{agent_id}
|
|
||||||
models:
|
|
||||||
inference_step: InferenceStep
|
|
||||||
tool_execution_step: ToolExecutionStep
|
|
||||||
tool_response: ToolResponse
|
|
||||||
shield_call_step: ShieldCallStep
|
|
||||||
memory_retrieval_step: MemoryRetrievalStep
|
|
||||||
subresources:
|
|
||||||
session:
|
|
||||||
models:
|
|
||||||
session: Session
|
|
||||||
methods:
|
|
||||||
list: get /v1alpha/agents/{agent_id}/sessions
|
|
||||||
create: post /v1alpha/agents/{agent_id}/session
|
|
||||||
delete: delete /v1alpha/agents/{agent_id}/session/{session_id}
|
|
||||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}
|
|
||||||
steps:
|
|
||||||
methods:
|
|
||||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/step/{step_id}
|
|
||||||
turn:
|
|
||||||
models:
|
|
||||||
turn: Turn
|
|
||||||
turn_response_event: AgentTurnResponseEvent
|
|
||||||
agent_turn_response_stream_chunk: AgentTurnResponseStreamChunk
|
|
||||||
methods:
|
|
||||||
create:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn
|
|
||||||
streaming:
|
|
||||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
|
||||||
param_discriminator: stream
|
|
||||||
retrieve: get /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}
|
|
||||||
resume:
|
|
||||||
type: http
|
|
||||||
endpoint: post /v1alpha/agents/{agent_id}/session/{session_id}/turn/{turn_id}/resume
|
|
||||||
streaming:
|
|
||||||
stream_event_model: alpha.agents.turn.agent_turn_response_stream_chunk
|
|
||||||
param_discriminator: stream
|
|
||||||
|
|
||||||
beta:
|
|
||||||
subresources:
|
|
||||||
datasets:
|
|
||||||
models:
|
|
||||||
list_datasets_response: ListDatasetsResponse
|
|
||||||
methods:
|
|
||||||
register: post /v1beta/datasets
|
|
||||||
retrieve: get /v1beta/datasets/{dataset_id}
|
|
||||||
list:
|
|
||||||
endpoint: get /v1beta/datasets
|
|
||||||
paginated: false
|
|
||||||
unregister: delete /v1beta/datasets/{dataset_id}
|
|
||||||
iterrows: get /v1beta/datasetio/iterrows/{dataset_id}
|
|
||||||
appendrows: post /v1beta/datasetio/append-rows/{dataset_id}
|
|
||||||
|
|
||||||
|
|
||||||
settings:
|
|
||||||
license: MIT
|
|
||||||
unwrap_response_fields: [ data ]
|
|
||||||
|
|
||||||
openapi:
|
|
||||||
transformations:
|
|
||||||
- command: renameValue
|
|
||||||
reason: pydantic reserved name
|
|
||||||
args:
|
|
||||||
filter:
|
|
||||||
only:
|
|
||||||
- '$.components.schemas.InferenceStep.properties.model_response'
|
|
||||||
rename:
|
|
||||||
python:
|
|
||||||
property_name: 'inference_model_response'
|
|
||||||
|
|
||||||
# - command: renameValue
|
|
||||||
# reason: pydantic reserved name
|
|
||||||
# args:
|
|
||||||
# filter:
|
|
||||||
# only:
|
|
||||||
# - '$.components.schemas.Model.properties.model_type'
|
|
||||||
# rename:
|
|
||||||
# python:
|
|
||||||
# property_name: 'type'
|
|
||||||
- command: mergeObject
|
|
||||||
reason: Better return_type using enum
|
|
||||||
args:
|
|
||||||
target:
|
|
||||||
- '$.components.schemas'
|
|
||||||
object:
|
|
||||||
ReturnType:
|
|
||||||
additionalProperties: false
|
|
||||||
properties:
|
|
||||||
type:
|
|
||||||
enum:
|
|
||||||
- string
|
|
||||||
- number
|
|
||||||
- boolean
|
|
||||||
- array
|
|
||||||
- object
|
|
||||||
- json
|
|
||||||
- union
|
|
||||||
- chat_completion_input
|
|
||||||
- completion_input
|
|
||||||
- agent_turn_input
|
|
||||||
required:
|
|
||||||
- type
|
|
||||||
type: object
|
|
||||||
- command: replaceProperties
|
|
||||||
reason: Replace return type properties with better model (see above)
|
|
||||||
args:
|
|
||||||
filter:
|
|
||||||
only:
|
|
||||||
- '$.components.schemas.ScoringFn.properties.return_type'
|
|
||||||
- '$.components.schemas.RegisterScoringFunctionRequest.properties.return_type'
|
|
||||||
value:
|
|
||||||
$ref: '#/components/schemas/ReturnType'
|
|
||||||
- command: oneOfToAnyOf
|
|
||||||
reason: Prism (mock server) doesn't like one of our requests as it technically matches multiple variants
|
|
||||||
- reason: For better names
|
|
||||||
command: extractToRefs
|
|
||||||
args:
|
|
||||||
ref:
|
|
||||||
target: '$.components.schemas.ToolCallDelta.properties.tool_call'
|
|
||||||
name: '#/components/schemas/ToolCallOrString'
|
|
||||||
|
|
||||||
# `readme` is used to configure the code snippets that will be rendered in the
|
|
||||||
# README.md of various SDKs. In particular, you can change the `headline`
|
|
||||||
# snippet's endpoint and the arguments to call it with.
|
|
||||||
readme:
|
|
||||||
example_requests:
|
|
||||||
default:
|
|
||||||
type: request
|
|
||||||
endpoint: post /v1/chat/completions
|
|
||||||
params: &ref_0 {}
|
|
||||||
headline:
|
|
||||||
type: request
|
|
||||||
endpoint: post /v1/models
|
|
||||||
params: *ref_0
|
|
||||||
pagination:
|
|
||||||
type: request
|
|
||||||
endpoint: post /v1/chat/completions
|
|
||||||
params: {}
|
|
||||||
|
|
@ -15,6 +15,141 @@ info:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
paths:
|
paths:
|
||||||
|
/v1/batches:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A list of batch objects.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListBatchesResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: List all batches for the current user.
|
||||||
|
description: List all batches for the current user.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
A cursor for pagination; returns batches after this batch ID.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
Number of batches to return (default 20, max 100).
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
deprecated: false
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
description: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreateBatchRequest'
|
||||||
|
required: true
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
description: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to retrieve.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}/cancel:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The updated batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: Cancel a batch that is in progress.
|
||||||
|
description: Cancel a batch that is in progress.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to cancel.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
/v1/chat/completions:
|
/v1/chat/completions:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -4212,6 +4347,331 @@ components:
|
||||||
title: Error
|
title: Error
|
||||||
description: >-
|
description: >-
|
||||||
Error response from the API. Roughly follows RFC 7807.
|
Error response from the API. Roughly follows RFC 7807.
|
||||||
|
ListBatchesResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- object
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
title: ListBatchesResponse
|
||||||
|
description: >-
|
||||||
|
Response containing a list of batch objects.
|
||||||
|
CreateBatchRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of an uploaded file containing requests for the batch.
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The endpoint to be used for all requests in the batch.
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
const: 24h
|
||||||
|
description: >-
|
||||||
|
The time window within which the batch should be processed.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Optional metadata for the batch.
|
||||||
|
idempotency_key:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Optional idempotency key. When provided, enables idempotent behavior.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_file_id
|
||||||
|
- endpoint
|
||||||
|
- completion_window
|
||||||
|
title: CreateBatchRequest
|
||||||
|
Batch:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
Order:
|
Order:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
@ -5474,11 +5934,44 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
discriminator:
|
discriminator:
|
||||||
propertyName: type
|
propertyName: type
|
||||||
mapping:
|
mapping:
|
||||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
|
OpenAIResponseInputMessageContentFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: input_file
|
||||||
|
default: input_file
|
||||||
|
description: >-
|
||||||
|
The type of the input item. Always `input_file`.
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The data of the file to be sent to the model.
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
|
file_url:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The URL of the file to be sent to the model.
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The name of the file to be sent to the model.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIResponseInputMessageContentFile
|
||||||
|
description: >-
|
||||||
|
File content for input messages in OpenAI response format.
|
||||||
OpenAIResponseInputMessageContentImage:
|
OpenAIResponseInputMessageContentImage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -5499,6 +5992,10 @@ components:
|
||||||
default: input_image
|
default: input_image
|
||||||
description: >-
|
description: >-
|
||||||
Content type identifier, always "input_image"
|
Content type identifier, always "input_image"
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
image_url:
|
image_url:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) URL of the image content
|
description: (Optional) URL of the image content
|
||||||
|
|
@ -6735,14 +7232,9 @@ components:
|
||||||
Error details for failed OpenAI response requests.
|
Error details for failed OpenAI response requests.
|
||||||
OpenAIResponseInput:
|
OpenAIResponseInput:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||||
OpenAIResponseInputToolFileSearch:
|
OpenAIResponseInputToolFileSearch:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -6898,6 +7390,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -6971,6 +7467,30 @@ components:
|
||||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||||
|
OpenAIResponsePrompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: Unique identifier of the prompt template
|
||||||
|
variables:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||||
|
description: >-
|
||||||
|
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||||
|
for template substitution. The substitution values can either be strings,
|
||||||
|
or other Response input types like images or files.
|
||||||
|
version:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Version number of the prompt to use (defaults to latest if not specified)
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
title: OpenAIResponsePrompt
|
||||||
|
description: >-
|
||||||
|
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||||
OpenAIResponseText:
|
OpenAIResponseText:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -7228,6 +7748,10 @@ components:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
description: The underlying LLM used for completions.
|
description: The underlying LLM used for completions.
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Prompt object with ID, version, and variables.
|
||||||
instructions:
|
instructions:
|
||||||
type: string
|
type: string
|
||||||
previous_response_id:
|
previous_response_id:
|
||||||
|
|
@ -7305,6 +7829,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -9867,7 +10395,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -9878,7 +10406,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -10049,7 +10577,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -10062,7 +10590,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -10190,6 +10718,10 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
The content of the chunk, which can be interleaved text, images, or other
|
The content of the chunk, which can be interleaved text, images, or other
|
||||||
types.
|
types.
|
||||||
|
chunk_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier for the chunk. Must be provided explicitly.
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
|
@ -10210,10 +10742,6 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Optional embedding for the chunk. If not provided, it will be computed
|
Optional embedding for the chunk. If not provided, it will be computed
|
||||||
later.
|
later.
|
||||||
stored_chunk_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
|
||||||
chunk_metadata:
|
chunk_metadata:
|
||||||
$ref: '#/components/schemas/ChunkMetadata'
|
$ref: '#/components/schemas/ChunkMetadata'
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -10222,6 +10750,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
|
- chunk_id
|
||||||
- metadata
|
- metadata
|
||||||
title: Chunk
|
title: Chunk
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -10286,7 +10815,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -10305,13 +10834,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -10331,7 +10860,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
@ -11600,7 +12129,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
@ -11850,7 +12378,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -11863,7 +12391,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -13460,6 +13988,19 @@ tags:
|
||||||
description: >-
|
description: >-
|
||||||
APIs for creating and interacting with agentic systems.
|
APIs for creating and interacting with agentic systems.
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
|
- name: Batches
|
||||||
|
description: >-
|
||||||
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
|
|
||||||
|
This API provides the following extensions:
|
||||||
|
- idempotent batch creation
|
||||||
|
|
||||||
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
x-displayName: >-
|
||||||
|
The Batches API enables efficient processing of multiple requests in a single
|
||||||
|
operation, particularly useful for processing large datasets, batch evaluation
|
||||||
|
workflows, and cost-effective inference at scale.
|
||||||
- name: Benchmarks
|
- name: Benchmarks
|
||||||
description: ''
|
description: ''
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
|
|
@ -13534,6 +14075,7 @@ x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
|
- Batches
|
||||||
- Benchmarks
|
- Benchmarks
|
||||||
- Conversations
|
- Conversations
|
||||||
- DatasetIO
|
- DatasetIO
|
||||||
|
|
|
||||||
|
|
@ -58,13 +58,21 @@ storage:
|
||||||
sql_default:
|
sql_default:
|
||||||
type: sql_sqlite
|
type: sql_sqlite
|
||||||
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
db_path: ${env.SQLITE_STORE_DIR:=~/.llama/distributions/ollama}/sqlstore.db
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata: {}
|
- metadata: {}
|
||||||
model_id: ${env.INFERENCE_MODEL}
|
model_id: ${env.INFERENCE_MODEL}
|
||||||
|
|
|
||||||
|
|
@ -113,13 +113,21 @@ data:
|
||||||
db: ${env.POSTGRES_DB:=llamastack}
|
db: ${env.POSTGRES_DB:=llamastack}
|
||||||
user: ${env.POSTGRES_USER:=llamastack}
|
user: ${env.POSTGRES_USER:=llamastack}
|
||||||
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
password: ${env.POSTGRES_PASSWORD:=llamastack}
|
||||||
references:
|
stores:
|
||||||
metadata:
|
metadata:
|
||||||
backend: kv_default
|
backend: kv_default
|
||||||
namespace: registry
|
namespace: registry
|
||||||
inference:
|
inference:
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
table_name: inference_store
|
table_name: inference_store
|
||||||
|
max_write_queue_size: 10000
|
||||||
|
num_writers: 4
|
||||||
|
conversations:
|
||||||
|
backend: sql_default
|
||||||
|
table_name: openai_conversations
|
||||||
|
prompts:
|
||||||
|
backend: kv_default
|
||||||
|
namespace: prompts
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
embedding_dimension: 768
|
embedding_dimension: 768
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,9 @@ storage:
|
||||||
conversations:
|
conversations:
|
||||||
table_name: openai_conversations
|
table_name: openai_conversations
|
||||||
backend: sql_default
|
backend: sql_default
|
||||||
|
prompts:
|
||||||
|
namespace: prompts
|
||||||
|
backend: kv_default
|
||||||
registered_resources:
|
registered_resources:
|
||||||
models:
|
models:
|
||||||
- metadata:
|
- metadata:
|
||||||
|
|
|
||||||
|
|
@ -79,6 +79,33 @@ docker run \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Via Docker with Custom Run Configuration
|
||||||
|
|
||||||
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set the path to your custom run.yaml file
|
||||||
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
||||||
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
-it \
|
||||||
|
--pull always \
|
||||||
|
--gpu all \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
||||||
|
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
||||||
|
llamastack/distribution-meta-reference-gpu \
|
||||||
|
--port $LLAMA_STACK_PORT
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
|
Available run configurations for this distribution:
|
||||||
|
- `run.yaml`
|
||||||
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
Make sure you have the Llama Stack CLI available.
|
Make sure you have the Llama Stack CLI available.
|
||||||
|
|
|
||||||
|
|
@ -127,13 +127,39 @@ docker run \
|
||||||
-it \
|
-it \
|
||||||
--pull always \
|
--pull always \
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
-v ./run.yaml:/root/my-run.yaml \
|
-v ~/.llama:/root/.llama \
|
||||||
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
llamastack/distribution-nvidia \
|
llamastack/distribution-nvidia \
|
||||||
--config /root/my-run.yaml \
|
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Via Docker with Custom Run Configuration
|
||||||
|
|
||||||
|
You can also run the Docker container with a custom run configuration file by mounting it into the container:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Set the path to your custom run.yaml file
|
||||||
|
CUSTOM_RUN_CONFIG=/path/to/your/custom-run.yaml
|
||||||
|
LLAMA_STACK_PORT=8321
|
||||||
|
|
||||||
|
docker run \
|
||||||
|
-it \
|
||||||
|
--pull always \
|
||||||
|
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
||||||
|
-v ~/.llama:/root/.llama \
|
||||||
|
-v $CUSTOM_RUN_CONFIG:/app/custom-run.yaml \
|
||||||
|
-e RUN_CONFIG_PATH=/app/custom-run.yaml \
|
||||||
|
-e NVIDIA_API_KEY=$NVIDIA_API_KEY \
|
||||||
|
llamastack/distribution-nvidia \
|
||||||
|
--port $LLAMA_STACK_PORT
|
||||||
|
```
|
||||||
|
|
||||||
|
**Note**: The run configuration must be mounted into the container before it can be used. The `-v` flag mounts your local file into the container, and the `RUN_CONFIG_PATH` environment variable tells the entrypoint script which configuration to use.
|
||||||
|
|
||||||
|
Available run configurations for this distribution:
|
||||||
|
- `run.yaml`
|
||||||
|
- `run-with-safety.yaml`
|
||||||
|
|
||||||
### Via venv
|
### Via venv
|
||||||
|
|
||||||
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
If you've set up your local development environment, you can also install the distribution dependencies using your local virtual environment.
|
||||||
|
|
|
||||||
27
docs/docs/providers/files/remote_openai.mdx
Normal file
27
docs/docs/providers/files/remote_openai.mdx
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
---
|
||||||
|
description: "OpenAI Files API provider for managing files through OpenAI's native file storage service."
|
||||||
|
sidebar_label: Remote - Openai
|
||||||
|
title: remote::openai
|
||||||
|
---
|
||||||
|
|
||||||
|
# remote::openai
|
||||||
|
|
||||||
|
## Description
|
||||||
|
|
||||||
|
OpenAI Files API provider for managing files through OpenAI's native file storage service.
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
| Field | Type | Required | Default | Description |
|
||||||
|
|-------|------|----------|---------|-------------|
|
||||||
|
| `api_key` | `<class 'str'>` | No | | OpenAI API key for authentication |
|
||||||
|
| `metadata_store` | `<class 'llama_stack.core.storage.datatypes.SqlStoreReference'>` | No | | SQL store configuration for file metadata |
|
||||||
|
|
||||||
|
## Sample Configuration
|
||||||
|
|
||||||
|
```yaml
|
||||||
|
api_key: ${env.OPENAI_API_KEY}
|
||||||
|
metadata_store:
|
||||||
|
table_name: openai_files_metadata
|
||||||
|
backend: sql_default
|
||||||
|
```
|
||||||
|
|
@ -20,6 +20,7 @@ NVIDIA inference provider for accessing NVIDIA NIM models and AI services.
|
||||||
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
| `url` | `<class 'str'>` | No | https://integrate.api.nvidia.com | A base url for accessing the NVIDIA NIM |
|
||||||
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
| `timeout` | `<class 'int'>` | No | 60 | Timeout for the HTTP requests |
|
||||||
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
| `append_api_version` | `<class 'bool'>` | No | True | When set to false, the API version will not be appended to the base_url. By default, it is true. |
|
||||||
|
| `rerank_model_to_url` | `dict[str, str` | No | `{'nv-rerank-qa-mistral-4b:1': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/reranking', 'nvidia/nv-rerankqa-mistral-4b-v3': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/nv-rerankqa-mistral-4b-v3/reranking', 'nvidia/llama-3.2-nv-rerankqa-1b-v2': 'https://ai.api.nvidia.com/v1/retrieval/nvidia/llama-3_2-nv-rerankqa-1b-v2/reranking'}` | Mapping of rerank model identifiers to their API endpoints. |
|
||||||
|
|
||||||
## Sample Configuration
|
## Sample Configuration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -72,14 +72,14 @@ description: |
|
||||||
Example with hybrid search:
|
Example with hybrid search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Using RRF ranker
|
# Using RRF ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -91,7 +91,7 @@ description: |
|
||||||
|
|
||||||
# Using weighted ranker
|
# Using weighted ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -105,7 +105,7 @@ description: |
|
||||||
Example with explicit vector search:
|
Example with explicit vector search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -114,7 +114,7 @@ description: |
|
||||||
Example with keyword search:
|
Example with keyword search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -277,14 +277,14 @@ The SQLite-vec provider supports three search modes:
|
||||||
Example with hybrid search:
|
Example with hybrid search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "hybrid", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
||||||
# Using RRF ranker
|
# Using RRF ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -296,7 +296,7 @@ response = await vector_io.query_chunks(
|
||||||
|
|
||||||
# Using weighted ranker
|
# Using weighted ranker
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={
|
params={
|
||||||
"mode": "hybrid",
|
"mode": "hybrid",
|
||||||
|
|
@ -310,7 +310,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with explicit vector search:
|
Example with explicit vector search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "vector", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
@ -319,7 +319,7 @@ response = await vector_io.query_chunks(
|
||||||
Example with keyword search:
|
Example with keyword search:
|
||||||
```python
|
```python
|
||||||
response = await vector_io.query_chunks(
|
response = await vector_io.query_chunks(
|
||||||
vector_db_id="my_db",
|
vector_store_id="my_db",
|
||||||
query="your query here",
|
query="your query here",
|
||||||
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
params={"mode": "keyword", "max_chunks": 3, "score_threshold": 0.7},
|
||||||
)
|
)
|
||||||
|
|
|
||||||
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
1036
docs/notebooks/llamastack_agents_getting_started_examples.ipynb
Normal file
File diff suppressed because it is too large
Load diff
|
|
@ -242,15 +242,6 @@ const sidebars: SidebarsConfig = {
|
||||||
'providers/eval/remote_nvidia'
|
'providers/eval/remote_nvidia'
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
|
||||||
type: 'category',
|
|
||||||
label: 'Telemetry',
|
|
||||||
collapsed: true,
|
|
||||||
items: [
|
|
||||||
'providers/telemetry/index',
|
|
||||||
'providers/telemetry/inline_meta-reference'
|
|
||||||
],
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
type: 'category',
|
type: 'category',
|
||||||
label: 'Batches',
|
label: 'Batches',
|
||||||
|
|
|
||||||
741
docs/static/deprecated-llama-stack-spec.html
vendored
741
docs/static/deprecated-llama-stack-spec.html
vendored
|
|
@ -1414,6 +1414,193 @@
|
||||||
"deprecated": true
|
"deprecated": true
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"/v1/openai/v1/batches": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A list of batch objects.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "List all batches for the current user.",
|
||||||
|
"description": "List all batches for the current user.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "after",
|
||||||
|
"in": "query",
|
||||||
|
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "limit",
|
||||||
|
"in": "query",
|
||||||
|
"description": "Number of batches to return (default 20, max 100).",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": true
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The created batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Create a new batch for processing multiple API requests.",
|
||||||
|
"description": "Create a new batch for processing multiple API requests.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"deprecated": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/openai/v1/batches/{batch_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Retrieve information about a specific batch.",
|
||||||
|
"description": "Retrieve information about a specific batch.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to retrieve.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": true
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/openai/v1/batches/{batch_id}/cancel": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The updated batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Cancel a batch that is in progress.",
|
||||||
|
"description": "Cancel a batch that is in progress.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to cancel.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": true
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/openai/v1/chat/completions": {
|
"/v1/openai/v1/chat/completions": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
|
@ -3901,7 +4088,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
@ -4391,7 +4577,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -4405,7 +4591,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
@ -6402,6 +6588,451 @@
|
||||||
"title": "Job",
|
"title": "Job",
|
||||||
"description": "A job execution instance with status tracking."
|
"description": "A job execution instance with status tracking."
|
||||||
},
|
},
|
||||||
|
"ListBatchesResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "list",
|
||||||
|
"default": "list"
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"first_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"last_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"has_more": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"object",
|
||||||
|
"data",
|
||||||
|
"has_more"
|
||||||
|
],
|
||||||
|
"title": "ListBatchesResponse",
|
||||||
|
"description": "Response containing a list of batch objects."
|
||||||
|
},
|
||||||
|
"CreateBatchRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The ID of an uploaded file containing requests for the batch."
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The endpoint to be used for all requests in the batch."
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "24h",
|
||||||
|
"description": "The time window within which the batch should be processed."
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Optional metadata for the batch."
|
||||||
|
},
|
||||||
|
"idempotency_key": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_file_id",
|
||||||
|
"endpoint",
|
||||||
|
"completion_window"
|
||||||
|
],
|
||||||
|
"title": "CreateBatchRequest"
|
||||||
|
},
|
||||||
|
"Batch": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
},
|
||||||
"Order": {
|
"Order": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
|
|
@ -8527,29 +9158,14 @@
|
||||||
"OpenAIResponseInput": {
|
"OpenAIResponseInput": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||||
}
|
}
|
||||||
|
|
@ -8592,16 +9208,53 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"discriminator": {
|
"discriminator": {
|
||||||
"propertyName": "type",
|
"propertyName": "type",
|
||||||
"mapping": {
|
"mapping": {
|
||||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||||
|
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"OpenAIResponseInputMessageContentFile": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "input_file",
|
||||||
|
"default": "input_file",
|
||||||
|
"description": "The type of the input item. Always `input_file`."
|
||||||
|
},
|
||||||
|
"file_data": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The data of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the file to be sent to the model."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseInputMessageContentFile",
|
||||||
|
"description": "File content for input messages in OpenAI response format."
|
||||||
|
},
|
||||||
"OpenAIResponseInputMessageContentImage": {
|
"OpenAIResponseInputMessageContentImage": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -8629,6 +9282,10 @@
|
||||||
"default": "input_image",
|
"default": "input_image",
|
||||||
"description": "Content type identifier, always \"input_image\""
|
"description": "Content type identifier, always \"input_image\""
|
||||||
},
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) URL of the image content"
|
"description": "(Optional) URL of the image content"
|
||||||
|
|
@ -8992,6 +9649,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -9416,6 +10077,32 @@
|
||||||
"title": "OpenAIResponseOutputMessageWebSearchToolCall",
|
"title": "OpenAIResponseOutputMessageWebSearchToolCall",
|
||||||
"description": "Web search tool call output message for OpenAI responses."
|
"description": "Web search tool call output message for OpenAI responses."
|
||||||
},
|
},
|
||||||
|
"OpenAIResponsePrompt": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier of the prompt template"
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||||
|
},
|
||||||
|
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponsePrompt",
|
||||||
|
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||||
|
},
|
||||||
"OpenAIResponseText": {
|
"OpenAIResponseText": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -9786,6 +10473,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The underlying LLM used for completions."
|
"description": "The underlying LLM used for completions."
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||||
|
},
|
||||||
"instructions": {
|
"instructions": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
|
@ -9874,6 +10565,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -13442,6 +14137,11 @@
|
||||||
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
|
"description": "APIs for creating and interacting with agentic systems.\n\n## Deprecated APIs\n\n> **⚠️ DEPRECATED**: These APIs are provided for migration reference and will be removed in future versions. Not recommended for new projects.\n\n### Migration Guidance\n\nIf you are using deprecated versions of the Agents or Responses APIs, please migrate to:\n\n- **Responses API**: Use the stable v1 Responses API endpoints\n",
|
||||||
"x-displayName": "Agents"
|
"x-displayName": "Agents"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Batches",
|
||||||
|
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||||
|
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Benchmarks",
|
"name": "Benchmarks",
|
||||||
"description": ""
|
"description": ""
|
||||||
|
|
@ -13492,6 +14192,7 @@
|
||||||
"name": "Operations",
|
"name": "Operations",
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents",
|
"Agents",
|
||||||
|
"Batches",
|
||||||
"Benchmarks",
|
"Benchmarks",
|
||||||
"DatasetIO",
|
"DatasetIO",
|
||||||
"Datasets",
|
"Datasets",
|
||||||
|
|
|
||||||
559
docs/static/deprecated-llama-stack-spec.yaml
vendored
559
docs/static/deprecated-llama-stack-spec.yaml
vendored
|
|
@ -1012,6 +1012,141 @@ paths:
|
||||||
schema:
|
schema:
|
||||||
type: string
|
type: string
|
||||||
deprecated: true
|
deprecated: true
|
||||||
|
/v1/openai/v1/batches:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A list of batch objects.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListBatchesResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: List all batches for the current user.
|
||||||
|
description: List all batches for the current user.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
A cursor for pagination; returns batches after this batch ID.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
Number of batches to return (default 20, max 100).
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
deprecated: true
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
description: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreateBatchRequest'
|
||||||
|
required: true
|
||||||
|
deprecated: true
|
||||||
|
/v1/openai/v1/batches/{batch_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
description: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to retrieve.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: true
|
||||||
|
/v1/openai/v1/batches/{batch_id}/cancel:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The updated batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: Cancel a batch that is in progress.
|
||||||
|
description: Cancel a batch that is in progress.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to cancel.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: true
|
||||||
/v1/openai/v1/chat/completions:
|
/v1/openai/v1/chat/completions:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2862,7 +2997,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
@ -3253,7 +3387,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -3266,7 +3400,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -4737,6 +4871,331 @@ components:
|
||||||
title: Job
|
title: Job
|
||||||
description: >-
|
description: >-
|
||||||
A job execution instance with status tracking.
|
A job execution instance with status tracking.
|
||||||
|
ListBatchesResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- object
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
title: ListBatchesResponse
|
||||||
|
description: >-
|
||||||
|
Response containing a list of batch objects.
|
||||||
|
CreateBatchRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of an uploaded file containing requests for the batch.
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The endpoint to be used for all requests in the batch.
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
const: 24h
|
||||||
|
description: >-
|
||||||
|
The time window within which the batch should be processed.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Optional metadata for the batch.
|
||||||
|
idempotency_key:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Optional idempotency key. When provided, enables idempotent behavior.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_file_id
|
||||||
|
- endpoint
|
||||||
|
- completion_window
|
||||||
|
title: CreateBatchRequest
|
||||||
|
Batch:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
Order:
|
Order:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
@ -6370,14 +6829,9 @@ components:
|
||||||
Error details for failed OpenAI response requests.
|
Error details for failed OpenAI response requests.
|
||||||
OpenAIResponseInput:
|
OpenAIResponseInput:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||||
"OpenAIResponseInputFunctionToolCallOutput":
|
"OpenAIResponseInputFunctionToolCallOutput":
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -6408,11 +6862,44 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
discriminator:
|
discriminator:
|
||||||
propertyName: type
|
propertyName: type
|
||||||
mapping:
|
mapping:
|
||||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
|
OpenAIResponseInputMessageContentFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: input_file
|
||||||
|
default: input_file
|
||||||
|
description: >-
|
||||||
|
The type of the input item. Always `input_file`.
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The data of the file to be sent to the model.
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
|
file_url:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The URL of the file to be sent to the model.
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The name of the file to be sent to the model.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIResponseInputMessageContentFile
|
||||||
|
description: >-
|
||||||
|
File content for input messages in OpenAI response format.
|
||||||
OpenAIResponseInputMessageContentImage:
|
OpenAIResponseInputMessageContentImage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -6433,6 +6920,10 @@ components:
|
||||||
default: input_image
|
default: input_image
|
||||||
description: >-
|
description: >-
|
||||||
Content type identifier, always "input_image"
|
Content type identifier, always "input_image"
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
image_url:
|
image_url:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) URL of the image content
|
description: (Optional) URL of the image content
|
||||||
|
|
@ -6703,6 +7194,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -7042,6 +7537,30 @@ components:
|
||||||
OpenAIResponseOutputMessageWebSearchToolCall
|
OpenAIResponseOutputMessageWebSearchToolCall
|
||||||
description: >-
|
description: >-
|
||||||
Web search tool call output message for OpenAI responses.
|
Web search tool call output message for OpenAI responses.
|
||||||
|
OpenAIResponsePrompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: Unique identifier of the prompt template
|
||||||
|
variables:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||||
|
description: >-
|
||||||
|
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||||
|
for template substitution. The substitution values can either be strings,
|
||||||
|
or other Response input types like images or files.
|
||||||
|
version:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Version number of the prompt to use (defaults to latest if not specified)
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
title: OpenAIResponsePrompt
|
||||||
|
description: >-
|
||||||
|
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||||
OpenAIResponseText:
|
OpenAIResponseText:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -7299,6 +7818,10 @@ components:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
description: The underlying LLM used for completions.
|
description: The underlying LLM used for completions.
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Prompt object with ID, version, and variables.
|
||||||
instructions:
|
instructions:
|
||||||
type: string
|
type: string
|
||||||
previous_response_id:
|
previous_response_id:
|
||||||
|
|
@ -7376,6 +7899,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -10196,6 +10723,19 @@ tags:
|
||||||
|
|
||||||
- **Responses API**: Use the stable v1 Responses API endpoints
|
- **Responses API**: Use the stable v1 Responses API endpoints
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
|
- name: Batches
|
||||||
|
description: >-
|
||||||
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
|
|
||||||
|
This API provides the following extensions:
|
||||||
|
- idempotent batch creation
|
||||||
|
|
||||||
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
x-displayName: >-
|
||||||
|
The Batches API enables efficient processing of multiple requests in a single
|
||||||
|
operation, particularly useful for processing large datasets, batch evaluation
|
||||||
|
workflows, and cost-effective inference at scale.
|
||||||
- name: Benchmarks
|
- name: Benchmarks
|
||||||
description: ''
|
description: ''
|
||||||
- name: DatasetIO
|
- name: DatasetIO
|
||||||
|
|
@ -10241,6 +10781,7 @@ x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
|
- Batches
|
||||||
- Benchmarks
|
- Benchmarks
|
||||||
- DatasetIO
|
- DatasetIO
|
||||||
- Datasets
|
- Datasets
|
||||||
|
|
|
||||||
|
|
@ -2376,7 +2376,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
@ -2866,7 +2865,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -2880,7 +2879,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
|
||||||
|
|
@ -1695,7 +1695,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
@ -2086,7 +2085,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -2099,7 +2098,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
|
||||||
761
docs/static/llama-stack-spec.html
vendored
761
docs/static/llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"paths": {
|
"paths": {
|
||||||
|
"/v1/batches": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A list of batch objects.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "List all batches for the current user.",
|
||||||
|
"description": "List all batches for the current user.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "after",
|
||||||
|
"in": "query",
|
||||||
|
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "limit",
|
||||||
|
"in": "query",
|
||||||
|
"description": "Number of batches to return (default 20, max 100).",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The created batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Create a new batch for processing multiple API requests.",
|
||||||
|
"description": "Create a new batch for processing multiple API requests.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/batches/{batch_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Retrieve information about a specific batch.",
|
||||||
|
"description": "Retrieve information about a specific batch.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to retrieve.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/batches/{batch_id}/cancel": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The updated batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Cancel a batch that is in progress.",
|
||||||
|
"description": "Cancel a batch that is in progress.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to cancel.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/chat/completions": {
|
"/v1/chat/completions": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
|
@ -4005,6 +4192,451 @@
|
||||||
"title": "Error",
|
"title": "Error",
|
||||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||||
},
|
},
|
||||||
|
"ListBatchesResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "list",
|
||||||
|
"default": "list"
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"first_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"last_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"has_more": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"object",
|
||||||
|
"data",
|
||||||
|
"has_more"
|
||||||
|
],
|
||||||
|
"title": "ListBatchesResponse",
|
||||||
|
"description": "Response containing a list of batch objects."
|
||||||
|
},
|
||||||
|
"CreateBatchRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The ID of an uploaded file containing requests for the batch."
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The endpoint to be used for all requests in the batch."
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "24h",
|
||||||
|
"description": "The time window within which the batch should be processed."
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Optional metadata for the batch."
|
||||||
|
},
|
||||||
|
"idempotency_key": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_file_id",
|
||||||
|
"endpoint",
|
||||||
|
"completion_window"
|
||||||
|
],
|
||||||
|
"title": "CreateBatchRequest"
|
||||||
|
},
|
||||||
|
"Batch": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
},
|
||||||
"Order": {
|
"Order": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
|
|
@ -5696,16 +6328,53 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"discriminator": {
|
"discriminator": {
|
||||||
"propertyName": "type",
|
"propertyName": "type",
|
||||||
"mapping": {
|
"mapping": {
|
||||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||||
|
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"OpenAIResponseInputMessageContentFile": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "input_file",
|
||||||
|
"default": "input_file",
|
||||||
|
"description": "The type of the input item. Always `input_file`."
|
||||||
|
},
|
||||||
|
"file_data": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The data of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the file to be sent to the model."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseInputMessageContentFile",
|
||||||
|
"description": "File content for input messages in OpenAI response format."
|
||||||
|
},
|
||||||
"OpenAIResponseInputMessageContentImage": {
|
"OpenAIResponseInputMessageContentImage": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -5733,6 +6402,10 @@
|
||||||
"default": "input_image",
|
"default": "input_image",
|
||||||
"description": "Content type identifier, always \"input_image\""
|
"description": "Content type identifier, always \"input_image\""
|
||||||
},
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) URL of the image content"
|
"description": "(Optional) URL of the image content"
|
||||||
|
|
@ -7305,29 +7978,14 @@
|
||||||
"OpenAIResponseInput": {
|
"OpenAIResponseInput": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||||
}
|
}
|
||||||
|
|
@ -7536,6 +8194,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -7631,6 +8293,32 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"OpenAIResponsePrompt": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier of the prompt template"
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||||
|
},
|
||||||
|
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponsePrompt",
|
||||||
|
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||||
|
},
|
||||||
"OpenAIResponseText": {
|
"OpenAIResponseText": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -8001,6 +8689,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The underlying LLM used for completions."
|
"description": "The underlying LLM used for completions."
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||||
|
},
|
||||||
"instructions": {
|
"instructions": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
|
@ -8089,6 +8781,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -11427,7 +12123,7 @@
|
||||||
},
|
},
|
||||||
"description": "List of documents to index in the RAG system"
|
"description": "List of documents to index in the RAG system"
|
||||||
},
|
},
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ID of the vector database to store the document embeddings"
|
"description": "ID of the vector database to store the document embeddings"
|
||||||
},
|
},
|
||||||
|
|
@ -11439,7 +12135,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"documents",
|
"documents",
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunk_size_in_tokens"
|
"chunk_size_in_tokens"
|
||||||
],
|
],
|
||||||
"title": "InsertRequest"
|
"title": "InsertRequest"
|
||||||
|
|
@ -11630,7 +12326,7 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The query content to search for in the indexed documents"
|
"description": "The query content to search for in the indexed documents"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|
@ -11645,7 +12341,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
"vector_db_ids"
|
"vector_store_ids"
|
||||||
],
|
],
|
||||||
"title": "QueryRequest"
|
"title": "QueryRequest"
|
||||||
},
|
},
|
||||||
|
|
@ -11833,6 +12529,10 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||||
},
|
},
|
||||||
|
"chunk_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||||
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
|
|
@ -11866,10 +12566,6 @@
|
||||||
},
|
},
|
||||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||||
},
|
},
|
||||||
"stored_chunk_id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
|
||||||
},
|
|
||||||
"chunk_metadata": {
|
"chunk_metadata": {
|
||||||
"$ref": "#/components/schemas/ChunkMetadata",
|
"$ref": "#/components/schemas/ChunkMetadata",
|
||||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||||
|
|
@ -11878,6 +12574,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
|
"chunk_id",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "Chunk",
|
"title": "Chunk",
|
||||||
|
|
@ -11938,7 +12635,7 @@
|
||||||
"InsertChunksRequest": {
|
"InsertChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to insert the chunks into."
|
"description": "The identifier of the vector database to insert the chunks into."
|
||||||
},
|
},
|
||||||
|
|
@ -11956,7 +12653,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunks"
|
"chunks"
|
||||||
],
|
],
|
||||||
"title": "InsertChunksRequest"
|
"title": "InsertChunksRequest"
|
||||||
|
|
@ -11964,7 +12661,7 @@
|
||||||
"QueryChunksRequest": {
|
"QueryChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to query."
|
"description": "The identifier of the vector database to query."
|
||||||
},
|
},
|
||||||
|
|
@ -12001,7 +12698,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"query"
|
"query"
|
||||||
],
|
],
|
||||||
"title": "QueryChunksRequest"
|
"title": "QueryChunksRequest"
|
||||||
|
|
@ -13224,6 +13921,11 @@
|
||||||
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
|
"description": "APIs for creating and interacting with agentic systems.\n\n## Responses API\n\nThe Responses API provides OpenAI-compatible functionality with enhanced capabilities for dynamic, stateful interactions.\n\n> **✅ STABLE**: This API is production-ready with backward compatibility guarantees. Recommended for production applications.\n\n### ✅ Supported Tools\n\nThe Responses API supports the following tool types:\n\n- **`web_search`**: Search the web for current information and real-time data\n- **`file_search`**: Search through uploaded files and vector stores\n - Supports dynamic `vector_store_ids` per call\n - Compatible with OpenAI file search patterns\n- **`function`**: Call custom functions with JSON schema validation\n- **`mcp_tool`**: Model Context Protocol integration\n\n### ✅ Supported Fields & Features\n\n**Core Capabilities:**\n- **Dynamic Configuration**: Switch models, vector stores, and tools per request without pre-configuration\n- **Conversation Branching**: Use `previous_response_id` to branch conversations and explore different paths\n- **Rich Annotations**: Automatic file citations, URL citations, and container file citations\n- **Status Tracking**: Monitor tool call execution status and handle failures gracefully\n\n### 🚧 Work in Progress\n\n- Full real-time response streaming support\n- `tool_choice` parameter\n- `max_tool_calls` parameter\n- Built-in tools (code interpreter, containers API)\n- Safety & guardrails\n- `reasoning` capabilities\n- `service_tier`\n- `logprobs`\n- `max_output_tokens`\n- `metadata` handling\n- `instructions`\n- `incomplete_details`\n- `background`",
|
||||||
"x-displayName": "Agents"
|
"x-displayName": "Agents"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Batches",
|
||||||
|
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||||
|
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Conversations",
|
"name": "Conversations",
|
||||||
"description": "Protocol for conversation management operations.",
|
"description": "Protocol for conversation management operations.",
|
||||||
|
|
@ -13297,6 +13999,7 @@
|
||||||
"name": "Operations",
|
"name": "Operations",
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents",
|
"Agents",
|
||||||
|
"Batches",
|
||||||
"Conversations",
|
"Conversations",
|
||||||
"Files",
|
"Files",
|
||||||
"Inference",
|
"Inference",
|
||||||
|
|
|
||||||
579
docs/static/llama-stack-spec.yaml
vendored
579
docs/static/llama-stack-spec.yaml
vendored
|
|
@ -12,6 +12,141 @@ info:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
paths:
|
paths:
|
||||||
|
/v1/batches:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A list of batch objects.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListBatchesResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: List all batches for the current user.
|
||||||
|
description: List all batches for the current user.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
A cursor for pagination; returns batches after this batch ID.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
Number of batches to return (default 20, max 100).
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
deprecated: false
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
description: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreateBatchRequest'
|
||||||
|
required: true
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
description: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to retrieve.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}/cancel:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The updated batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: Cancel a batch that is in progress.
|
||||||
|
description: Cancel a batch that is in progress.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to cancel.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
/v1/chat/completions:
|
/v1/chat/completions:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -2999,6 +3134,331 @@ components:
|
||||||
title: Error
|
title: Error
|
||||||
description: >-
|
description: >-
|
||||||
Error response from the API. Roughly follows RFC 7807.
|
Error response from the API. Roughly follows RFC 7807.
|
||||||
|
ListBatchesResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- object
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
title: ListBatchesResponse
|
||||||
|
description: >-
|
||||||
|
Response containing a list of batch objects.
|
||||||
|
CreateBatchRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of an uploaded file containing requests for the batch.
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The endpoint to be used for all requests in the batch.
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
const: 24h
|
||||||
|
description: >-
|
||||||
|
The time window within which the batch should be processed.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Optional metadata for the batch.
|
||||||
|
idempotency_key:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Optional idempotency key. When provided, enables idempotent behavior.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_file_id
|
||||||
|
- endpoint
|
||||||
|
- completion_window
|
||||||
|
title: CreateBatchRequest
|
||||||
|
Batch:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
Order:
|
Order:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
@ -4261,11 +4721,44 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
discriminator:
|
discriminator:
|
||||||
propertyName: type
|
propertyName: type
|
||||||
mapping:
|
mapping:
|
||||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
|
OpenAIResponseInputMessageContentFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: input_file
|
||||||
|
default: input_file
|
||||||
|
description: >-
|
||||||
|
The type of the input item. Always `input_file`.
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The data of the file to be sent to the model.
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
|
file_url:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The URL of the file to be sent to the model.
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The name of the file to be sent to the model.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIResponseInputMessageContentFile
|
||||||
|
description: >-
|
||||||
|
File content for input messages in OpenAI response format.
|
||||||
OpenAIResponseInputMessageContentImage:
|
OpenAIResponseInputMessageContentImage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -4286,6 +4779,10 @@ components:
|
||||||
default: input_image
|
default: input_image
|
||||||
description: >-
|
description: >-
|
||||||
Content type identifier, always "input_image"
|
Content type identifier, always "input_image"
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
image_url:
|
image_url:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) URL of the image content
|
description: (Optional) URL of the image content
|
||||||
|
|
@ -5522,14 +6019,9 @@ components:
|
||||||
Error details for failed OpenAI response requests.
|
Error details for failed OpenAI response requests.
|
||||||
OpenAIResponseInput:
|
OpenAIResponseInput:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||||
OpenAIResponseInputToolFileSearch:
|
OpenAIResponseInputToolFileSearch:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -5685,6 +6177,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -5758,6 +6254,30 @@ components:
|
||||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||||
|
OpenAIResponsePrompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: Unique identifier of the prompt template
|
||||||
|
variables:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||||
|
description: >-
|
||||||
|
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||||
|
for template substitution. The substitution values can either be strings,
|
||||||
|
or other Response input types like images or files.
|
||||||
|
version:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Version number of the prompt to use (defaults to latest if not specified)
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
title: OpenAIResponsePrompt
|
||||||
|
description: >-
|
||||||
|
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||||
OpenAIResponseText:
|
OpenAIResponseText:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -6015,6 +6535,10 @@ components:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
description: The underlying LLM used for completions.
|
description: The underlying LLM used for completions.
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Prompt object with ID, version, and variables.
|
||||||
instructions:
|
instructions:
|
||||||
type: string
|
type: string
|
||||||
previous_response_id:
|
previous_response_id:
|
||||||
|
|
@ -6092,6 +6616,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -8654,7 +9182,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -8665,7 +9193,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -8836,7 +9364,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -8849,7 +9377,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -8977,6 +9505,10 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
The content of the chunk, which can be interleaved text, images, or other
|
The content of the chunk, which can be interleaved text, images, or other
|
||||||
types.
|
types.
|
||||||
|
chunk_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier for the chunk. Must be provided explicitly.
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
|
@ -8997,10 +9529,6 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Optional embedding for the chunk. If not provided, it will be computed
|
Optional embedding for the chunk. If not provided, it will be computed
|
||||||
later.
|
later.
|
||||||
stored_chunk_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
|
||||||
chunk_metadata:
|
chunk_metadata:
|
||||||
$ref: '#/components/schemas/ChunkMetadata'
|
$ref: '#/components/schemas/ChunkMetadata'
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -9009,6 +9537,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
|
- chunk_id
|
||||||
- metadata
|
- metadata
|
||||||
title: Chunk
|
title: Chunk
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -9073,7 +9602,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -9092,13 +9621,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -9118,7 +9647,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
@ -10075,6 +10604,19 @@ tags:
|
||||||
|
|
||||||
- `background`
|
- `background`
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
|
- name: Batches
|
||||||
|
description: >-
|
||||||
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
|
|
||||||
|
This API provides the following extensions:
|
||||||
|
- idempotent batch creation
|
||||||
|
|
||||||
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
x-displayName: >-
|
||||||
|
The Batches API enables efficient processing of multiple requests in a single
|
||||||
|
operation, particularly useful for processing large datasets, batch evaluation
|
||||||
|
workflows, and cost-effective inference at scale.
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
description: >-
|
description: >-
|
||||||
Protocol for conversation management operations.
|
Protocol for conversation management operations.
|
||||||
|
|
@ -10137,6 +10679,7 @@ x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
|
- Batches
|
||||||
- Conversations
|
- Conversations
|
||||||
- Files
|
- Files
|
||||||
- Inference
|
- Inference
|
||||||
|
|
|
||||||
766
docs/static/stainless-llama-stack-spec.html
vendored
766
docs/static/stainless-llama-stack-spec.html
vendored
|
|
@ -40,6 +40,193 @@
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"paths": {
|
"paths": {
|
||||||
|
"/v1/batches": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "A list of batch objects.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ListBatchesResponse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "List all batches for the current user.",
|
||||||
|
"description": "List all batches for the current user.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "after",
|
||||||
|
"in": "query",
|
||||||
|
"description": "A cursor for pagination; returns batches after this batch ID.",
|
||||||
|
"required": false,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "limit",
|
||||||
|
"in": "query",
|
||||||
|
"description": "Number of batches to return (default 20, max 100).",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
},
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The created batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Create a new batch for processing multiple API requests.",
|
||||||
|
"description": "Create a new batch for processing multiple API requests.",
|
||||||
|
"parameters": [],
|
||||||
|
"requestBody": {
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/CreateBatchRequest"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": true
|
||||||
|
},
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/batches/{batch_id}": {
|
||||||
|
"get": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Retrieve information about a specific batch.",
|
||||||
|
"description": "Retrieve information about a specific batch.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to retrieve.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"/v1/batches/{batch_id}/cancel": {
|
||||||
|
"post": {
|
||||||
|
"responses": {
|
||||||
|
"200": {
|
||||||
|
"description": "The updated batch object.",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/Batch"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"$ref": "#/components/responses/BadRequest400"
|
||||||
|
},
|
||||||
|
"429": {
|
||||||
|
"$ref": "#/components/responses/TooManyRequests429"
|
||||||
|
},
|
||||||
|
"500": {
|
||||||
|
"$ref": "#/components/responses/InternalServerError500"
|
||||||
|
},
|
||||||
|
"default": {
|
||||||
|
"$ref": "#/components/responses/DefaultError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"tags": [
|
||||||
|
"Batches"
|
||||||
|
],
|
||||||
|
"summary": "Cancel a batch that is in progress.",
|
||||||
|
"description": "Cancel a batch that is in progress.",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "batch_id",
|
||||||
|
"in": "path",
|
||||||
|
"description": "The ID of the batch to cancel.",
|
||||||
|
"required": true,
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"deprecated": false
|
||||||
|
}
|
||||||
|
},
|
||||||
"/v1/chat/completions": {
|
"/v1/chat/completions": {
|
||||||
"get": {
|
"get": {
|
||||||
"responses": {
|
"responses": {
|
||||||
|
|
@ -5677,6 +5864,451 @@
|
||||||
"title": "Error",
|
"title": "Error",
|
||||||
"description": "Error response from the API. Roughly follows RFC 7807."
|
"description": "Error response from the API. Roughly follows RFC 7807."
|
||||||
},
|
},
|
||||||
|
"ListBatchesResponse": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "list",
|
||||||
|
"default": "list"
|
||||||
|
},
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"first_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"last_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"has_more": {
|
||||||
|
"type": "boolean",
|
||||||
|
"default": false
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"object",
|
||||||
|
"data",
|
||||||
|
"has_more"
|
||||||
|
],
|
||||||
|
"title": "ListBatchesResponse",
|
||||||
|
"description": "Response containing a list of batch objects."
|
||||||
|
},
|
||||||
|
"CreateBatchRequest": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The ID of an uploaded file containing requests for the batch."
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The endpoint to be used for all requests in the batch."
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "24h",
|
||||||
|
"description": "The time window within which the batch should be processed."
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"description": "Optional metadata for the batch."
|
||||||
|
},
|
||||||
|
"idempotency_key": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Optional idempotency key. When provided, enables idempotent behavior."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_file_id",
|
||||||
|
"endpoint",
|
||||||
|
"completion_window"
|
||||||
|
],
|
||||||
|
"title": "CreateBatchRequest"
|
||||||
|
},
|
||||||
|
"Batch": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"completion_window": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"created_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"endpoint": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"input_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "batch"
|
||||||
|
},
|
||||||
|
"status": {
|
||||||
|
"type": "string",
|
||||||
|
"enum": [
|
||||||
|
"validating",
|
||||||
|
"failed",
|
||||||
|
"in_progress",
|
||||||
|
"finalizing",
|
||||||
|
"completed",
|
||||||
|
"expired",
|
||||||
|
"cancelling",
|
||||||
|
"cancelled"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"cancelled_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"cancelling_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"completed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"error_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"errors": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"data": {
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"code": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"line": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"param": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "BatchError"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"object": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"title": "Errors"
|
||||||
|
},
|
||||||
|
"expired_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"expires_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"finalizing_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"in_progress_at": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"metadata": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"model": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"output_file_id": {
|
||||||
|
"type": "string"
|
||||||
|
},
|
||||||
|
"request_counts": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"completed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"failed": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"total": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"completed",
|
||||||
|
"failed",
|
||||||
|
"total"
|
||||||
|
],
|
||||||
|
"title": "BatchRequestCounts"
|
||||||
|
},
|
||||||
|
"usage": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"input_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"input_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"cached_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"cached_tokens"
|
||||||
|
],
|
||||||
|
"title": "InputTokensDetails"
|
||||||
|
},
|
||||||
|
"output_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
},
|
||||||
|
"output_tokens_details": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"reasoning_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"reasoning_tokens"
|
||||||
|
],
|
||||||
|
"title": "OutputTokensDetails"
|
||||||
|
},
|
||||||
|
"total_tokens": {
|
||||||
|
"type": "integer"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"input_tokens",
|
||||||
|
"input_tokens_details",
|
||||||
|
"output_tokens",
|
||||||
|
"output_tokens_details",
|
||||||
|
"total_tokens"
|
||||||
|
],
|
||||||
|
"title": "BatchUsage"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id",
|
||||||
|
"completion_window",
|
||||||
|
"created_at",
|
||||||
|
"endpoint",
|
||||||
|
"input_file_id",
|
||||||
|
"object",
|
||||||
|
"status"
|
||||||
|
],
|
||||||
|
"title": "Batch"
|
||||||
|
},
|
||||||
"Order": {
|
"Order": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"enum": [
|
"enum": [
|
||||||
|
|
@ -7368,16 +8000,53 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"discriminator": {
|
"discriminator": {
|
||||||
"propertyName": "type",
|
"propertyName": "type",
|
||||||
"mapping": {
|
"mapping": {
|
||||||
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
"input_text": "#/components/schemas/OpenAIResponseInputMessageContentText",
|
||||||
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage"
|
"input_image": "#/components/schemas/OpenAIResponseInputMessageContentImage",
|
||||||
|
"input_file": "#/components/schemas/OpenAIResponseInputMessageContentFile"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"OpenAIResponseInputMessageContentFile": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"type": {
|
||||||
|
"type": "string",
|
||||||
|
"const": "input_file",
|
||||||
|
"default": "input_file",
|
||||||
|
"description": "The type of the input item. Always `input_file`."
|
||||||
|
},
|
||||||
|
"file_data": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The data of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"file_url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The URL of the file to be sent to the model."
|
||||||
|
},
|
||||||
|
"filename": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The name of the file to be sent to the model."
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"type"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponseInputMessageContentFile",
|
||||||
|
"description": "File content for input messages in OpenAI response format."
|
||||||
|
},
|
||||||
"OpenAIResponseInputMessageContentImage": {
|
"OpenAIResponseInputMessageContentImage": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -7405,6 +8074,10 @@
|
||||||
"default": "input_image",
|
"default": "input_image",
|
||||||
"description": "Content type identifier, always \"input_image\""
|
"description": "Content type identifier, always \"input_image\""
|
||||||
},
|
},
|
||||||
|
"file_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "(Optional) The ID of the file to be sent to the model."
|
||||||
|
},
|
||||||
"image_url": {
|
"image_url": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) URL of the image content"
|
"description": "(Optional) URL of the image content"
|
||||||
|
|
@ -8977,29 +9650,14 @@
|
||||||
"OpenAIResponseInput": {
|
"OpenAIResponseInput": {
|
||||||
"oneOf": [
|
"oneOf": [
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall"
|
"$ref": "#/components/schemas/OpenAIResponseOutput"
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall"
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
"$ref": "#/components/schemas/OpenAIResponseInputFunctionToolCallOutput"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalRequest"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
"$ref": "#/components/schemas/OpenAIResponseMCPApprovalResponse"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPCall"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"$ref": "#/components/schemas/OpenAIResponseOutputMessageMCPListTools"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
"$ref": "#/components/schemas/OpenAIResponseMessage"
|
||||||
}
|
}
|
||||||
|
|
@ -9208,6 +9866,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -9303,6 +9965,32 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"OpenAIResponsePrompt": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier of the prompt template"
|
||||||
|
},
|
||||||
|
"variables": {
|
||||||
|
"type": "object",
|
||||||
|
"additionalProperties": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponseInputMessageContent"
|
||||||
|
},
|
||||||
|
"description": "Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types like images or files."
|
||||||
|
},
|
||||||
|
"version": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Version number of the prompt to use (defaults to latest if not specified)"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"additionalProperties": false,
|
||||||
|
"required": [
|
||||||
|
"id"
|
||||||
|
],
|
||||||
|
"title": "OpenAIResponsePrompt",
|
||||||
|
"description": "OpenAI compatible Prompt object that is used in OpenAI responses."
|
||||||
|
},
|
||||||
"OpenAIResponseText": {
|
"OpenAIResponseText": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
|
|
@ -9673,6 +10361,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The underlying LLM used for completions."
|
"description": "The underlying LLM used for completions."
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Prompt object with ID, version, and variables."
|
||||||
|
},
|
||||||
"instructions": {
|
"instructions": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
},
|
},
|
||||||
|
|
@ -9761,6 +10453,10 @@
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "(Optional) ID of the previous response in a conversation"
|
"description": "(Optional) ID of the previous response in a conversation"
|
||||||
},
|
},
|
||||||
|
"prompt": {
|
||||||
|
"$ref": "#/components/schemas/OpenAIResponsePrompt",
|
||||||
|
"description": "(Optional) Reference to a prompt template and its variables."
|
||||||
|
},
|
||||||
"status": {
|
"status": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "Current status of the response generation"
|
"description": "Current status of the response generation"
|
||||||
|
|
@ -13099,7 +13795,7 @@
|
||||||
},
|
},
|
||||||
"description": "List of documents to index in the RAG system"
|
"description": "List of documents to index in the RAG system"
|
||||||
},
|
},
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "ID of the vector database to store the document embeddings"
|
"description": "ID of the vector database to store the document embeddings"
|
||||||
},
|
},
|
||||||
|
|
@ -13111,7 +13807,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"documents",
|
"documents",
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunk_size_in_tokens"
|
"chunk_size_in_tokens"
|
||||||
],
|
],
|
||||||
"title": "InsertRequest"
|
"title": "InsertRequest"
|
||||||
|
|
@ -13302,7 +13998,7 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The query content to search for in the indexed documents"
|
"description": "The query content to search for in the indexed documents"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "array",
|
"type": "array",
|
||||||
"items": {
|
"items": {
|
||||||
"type": "string"
|
"type": "string"
|
||||||
|
|
@ -13317,7 +14013,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
"vector_db_ids"
|
"vector_store_ids"
|
||||||
],
|
],
|
||||||
"title": "QueryRequest"
|
"title": "QueryRequest"
|
||||||
},
|
},
|
||||||
|
|
@ -13505,6 +14201,10 @@
|
||||||
"$ref": "#/components/schemas/InterleavedContent",
|
"$ref": "#/components/schemas/InterleavedContent",
|
||||||
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
"description": "The content of the chunk, which can be interleaved text, images, or other types."
|
||||||
},
|
},
|
||||||
|
"chunk_id": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Unique identifier for the chunk. Must be provided explicitly."
|
||||||
|
},
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"additionalProperties": {
|
"additionalProperties": {
|
||||||
|
|
@ -13538,10 +14238,6 @@
|
||||||
},
|
},
|
||||||
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
"description": "Optional embedding for the chunk. If not provided, it will be computed later."
|
||||||
},
|
},
|
||||||
"stored_chunk_id": {
|
|
||||||
"type": "string",
|
|
||||||
"description": "The chunk ID that is stored in the vector database. Used for backend functionality."
|
|
||||||
},
|
|
||||||
"chunk_metadata": {
|
"chunk_metadata": {
|
||||||
"$ref": "#/components/schemas/ChunkMetadata",
|
"$ref": "#/components/schemas/ChunkMetadata",
|
||||||
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
"description": "Metadata for the chunk that will NOT be used in the context during inference. The `chunk_metadata` is required backend functionality."
|
||||||
|
|
@ -13550,6 +14246,7 @@
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"content",
|
"content",
|
||||||
|
"chunk_id",
|
||||||
"metadata"
|
"metadata"
|
||||||
],
|
],
|
||||||
"title": "Chunk",
|
"title": "Chunk",
|
||||||
|
|
@ -13610,7 +14307,7 @@
|
||||||
"InsertChunksRequest": {
|
"InsertChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to insert the chunks into."
|
"description": "The identifier of the vector database to insert the chunks into."
|
||||||
},
|
},
|
||||||
|
|
@ -13628,7 +14325,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"chunks"
|
"chunks"
|
||||||
],
|
],
|
||||||
"title": "InsertChunksRequest"
|
"title": "InsertChunksRequest"
|
||||||
|
|
@ -13636,7 +14333,7 @@
|
||||||
"QueryChunksRequest": {
|
"QueryChunksRequest": {
|
||||||
"type": "object",
|
"type": "object",
|
||||||
"properties": {
|
"properties": {
|
||||||
"vector_db_id": {
|
"vector_store_id": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The identifier of the vector database to query."
|
"description": "The identifier of the vector database to query."
|
||||||
},
|
},
|
||||||
|
|
@ -13673,7 +14370,7 @@
|
||||||
},
|
},
|
||||||
"additionalProperties": false,
|
"additionalProperties": false,
|
||||||
"required": [
|
"required": [
|
||||||
"vector_db_id",
|
"vector_store_id",
|
||||||
"query"
|
"query"
|
||||||
],
|
],
|
||||||
"title": "QueryChunksRequest"
|
"title": "QueryChunksRequest"
|
||||||
|
|
@ -15452,7 +16149,6 @@
|
||||||
},
|
},
|
||||||
"max_tokens": {
|
"max_tokens": {
|
||||||
"type": "integer",
|
"type": "integer",
|
||||||
"default": 0,
|
|
||||||
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
"description": "The maximum number of tokens that can be generated in the completion. The token count of your prompt plus max_tokens cannot exceed the model's context length."
|
||||||
},
|
},
|
||||||
"repetition_penalty": {
|
"repetition_penalty": {
|
||||||
|
|
@ -15735,7 +16431,7 @@
|
||||||
"const": "memory_retrieval",
|
"const": "memory_retrieval",
|
||||||
"default": "memory_retrieval"
|
"default": "memory_retrieval"
|
||||||
},
|
},
|
||||||
"vector_db_ids": {
|
"vector_store_ids": {
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"description": "The IDs of the vector databases to retrieve context from."
|
"description": "The IDs of the vector databases to retrieve context from."
|
||||||
},
|
},
|
||||||
|
|
@ -15749,7 +16445,7 @@
|
||||||
"turn_id",
|
"turn_id",
|
||||||
"step_id",
|
"step_id",
|
||||||
"step_type",
|
"step_type",
|
||||||
"vector_db_ids",
|
"vector_store_ids",
|
||||||
"inserted_context"
|
"inserted_context"
|
||||||
],
|
],
|
||||||
"title": "MemoryRetrievalStep",
|
"title": "MemoryRetrievalStep",
|
||||||
|
|
@ -17897,6 +18593,11 @@
|
||||||
"description": "APIs for creating and interacting with agentic systems.",
|
"description": "APIs for creating and interacting with agentic systems.",
|
||||||
"x-displayName": "Agents"
|
"x-displayName": "Agents"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "Batches",
|
||||||
|
"description": "The API is designed to allow use of openai client libraries for seamless integration.\n\nThis API provides the following extensions:\n - idempotent batch creation\n\nNote: This API is currently under active development and may undergo changes.",
|
||||||
|
"x-displayName": "The Batches API enables efficient processing of multiple requests in a single operation, particularly useful for processing large datasets, batch evaluation workflows, and cost-effective inference at scale."
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"name": "Benchmarks",
|
"name": "Benchmarks",
|
||||||
"description": ""
|
"description": ""
|
||||||
|
|
@ -17991,6 +18692,7 @@
|
||||||
"name": "Operations",
|
"name": "Operations",
|
||||||
"tags": [
|
"tags": [
|
||||||
"Agents",
|
"Agents",
|
||||||
|
"Batches",
|
||||||
"Benchmarks",
|
"Benchmarks",
|
||||||
"Conversations",
|
"Conversations",
|
||||||
"DatasetIO",
|
"DatasetIO",
|
||||||
|
|
|
||||||
584
docs/static/stainless-llama-stack-spec.yaml
vendored
584
docs/static/stainless-llama-stack-spec.yaml
vendored
|
|
@ -15,6 +15,141 @@ info:
|
||||||
servers:
|
servers:
|
||||||
- url: http://any-hosted-llama-stack.com
|
- url: http://any-hosted-llama-stack.com
|
||||||
paths:
|
paths:
|
||||||
|
/v1/batches:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: A list of batch objects.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/ListBatchesResponse'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: List all batches for the current user.
|
||||||
|
description: List all batches for the current user.
|
||||||
|
parameters:
|
||||||
|
- name: after
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
A cursor for pagination; returns batches after this batch ID.
|
||||||
|
required: false
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
- name: limit
|
||||||
|
in: query
|
||||||
|
description: >-
|
||||||
|
Number of batches to return (default 20, max 100).
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: integer
|
||||||
|
deprecated: false
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The created batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
description: >-
|
||||||
|
Create a new batch for processing multiple API requests.
|
||||||
|
parameters: []
|
||||||
|
requestBody:
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/CreateBatchRequest'
|
||||||
|
required: true
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}:
|
||||||
|
get:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
description: >-
|
||||||
|
Retrieve information about a specific batch.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to retrieve.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
|
/v1/batches/{batch_id}/cancel:
|
||||||
|
post:
|
||||||
|
responses:
|
||||||
|
'200':
|
||||||
|
description: The updated batch object.
|
||||||
|
content:
|
||||||
|
application/json:
|
||||||
|
schema:
|
||||||
|
$ref: '#/components/schemas/Batch'
|
||||||
|
'400':
|
||||||
|
$ref: '#/components/responses/BadRequest400'
|
||||||
|
'429':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/TooManyRequests429
|
||||||
|
'500':
|
||||||
|
$ref: >-
|
||||||
|
#/components/responses/InternalServerError500
|
||||||
|
default:
|
||||||
|
$ref: '#/components/responses/DefaultError'
|
||||||
|
tags:
|
||||||
|
- Batches
|
||||||
|
summary: Cancel a batch that is in progress.
|
||||||
|
description: Cancel a batch that is in progress.
|
||||||
|
parameters:
|
||||||
|
- name: batch_id
|
||||||
|
in: path
|
||||||
|
description: The ID of the batch to cancel.
|
||||||
|
required: true
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
deprecated: false
|
||||||
/v1/chat/completions:
|
/v1/chat/completions:
|
||||||
get:
|
get:
|
||||||
responses:
|
responses:
|
||||||
|
|
@ -4212,6 +4347,331 @@ components:
|
||||||
title: Error
|
title: Error
|
||||||
description: >-
|
description: >-
|
||||||
Error response from the API. Roughly follows RFC 7807.
|
Error response from the API. Roughly follows RFC 7807.
|
||||||
|
ListBatchesResponse:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: list
|
||||||
|
default: list
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
|
first_id:
|
||||||
|
type: string
|
||||||
|
last_id:
|
||||||
|
type: string
|
||||||
|
has_more:
|
||||||
|
type: boolean
|
||||||
|
default: false
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- object
|
||||||
|
- data
|
||||||
|
- has_more
|
||||||
|
title: ListBatchesResponse
|
||||||
|
description: >-
|
||||||
|
Response containing a list of batch objects.
|
||||||
|
CreateBatchRequest:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The ID of an uploaded file containing requests for the batch.
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The endpoint to be used for all requests in the batch.
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
const: 24h
|
||||||
|
description: >-
|
||||||
|
The time window within which the batch should be processed.
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
description: Optional metadata for the batch.
|
||||||
|
idempotency_key:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Optional idempotency key. When provided, enables idempotent behavior.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_file_id
|
||||||
|
- endpoint
|
||||||
|
- completion_window
|
||||||
|
title: CreateBatchRequest
|
||||||
|
Batch:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
completion_window:
|
||||||
|
type: string
|
||||||
|
created_at:
|
||||||
|
type: integer
|
||||||
|
endpoint:
|
||||||
|
type: string
|
||||||
|
input_file_id:
|
||||||
|
type: string
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
const: batch
|
||||||
|
status:
|
||||||
|
type: string
|
||||||
|
enum:
|
||||||
|
- validating
|
||||||
|
- failed
|
||||||
|
- in_progress
|
||||||
|
- finalizing
|
||||||
|
- completed
|
||||||
|
- expired
|
||||||
|
- cancelling
|
||||||
|
- cancelled
|
||||||
|
cancelled_at:
|
||||||
|
type: integer
|
||||||
|
cancelling_at:
|
||||||
|
type: integer
|
||||||
|
completed_at:
|
||||||
|
type: integer
|
||||||
|
error_file_id:
|
||||||
|
type: string
|
||||||
|
errors:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
data:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
code:
|
||||||
|
type: string
|
||||||
|
line:
|
||||||
|
type: integer
|
||||||
|
message:
|
||||||
|
type: string
|
||||||
|
param:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: BatchError
|
||||||
|
object:
|
||||||
|
type: string
|
||||||
|
additionalProperties: false
|
||||||
|
title: Errors
|
||||||
|
expired_at:
|
||||||
|
type: integer
|
||||||
|
expires_at:
|
||||||
|
type: integer
|
||||||
|
failed_at:
|
||||||
|
type: integer
|
||||||
|
finalizing_at:
|
||||||
|
type: integer
|
||||||
|
in_progress_at:
|
||||||
|
type: integer
|
||||||
|
metadata:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
type: string
|
||||||
|
model:
|
||||||
|
type: string
|
||||||
|
output_file_id:
|
||||||
|
type: string
|
||||||
|
request_counts:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
completed:
|
||||||
|
type: integer
|
||||||
|
failed:
|
||||||
|
type: integer
|
||||||
|
total:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- completed
|
||||||
|
- failed
|
||||||
|
- total
|
||||||
|
title: BatchRequestCounts
|
||||||
|
usage:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
input_tokens:
|
||||||
|
type: integer
|
||||||
|
input_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
cached_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- cached_tokens
|
||||||
|
title: InputTokensDetails
|
||||||
|
output_tokens:
|
||||||
|
type: integer
|
||||||
|
output_tokens_details:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
reasoning_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- reasoning_tokens
|
||||||
|
title: OutputTokensDetails
|
||||||
|
total_tokens:
|
||||||
|
type: integer
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- input_tokens
|
||||||
|
- input_tokens_details
|
||||||
|
- output_tokens
|
||||||
|
- output_tokens_details
|
||||||
|
- total_tokens
|
||||||
|
title: BatchUsage
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
- completion_window
|
||||||
|
- created_at
|
||||||
|
- endpoint
|
||||||
|
- input_file_id
|
||||||
|
- object
|
||||||
|
- status
|
||||||
|
title: Batch
|
||||||
Order:
|
Order:
|
||||||
type: string
|
type: string
|
||||||
enum:
|
enum:
|
||||||
|
|
@ -5474,11 +5934,44 @@ components:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
- $ref: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
discriminator:
|
discriminator:
|
||||||
propertyName: type
|
propertyName: type
|
||||||
mapping:
|
mapping:
|
||||||
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
input_text: '#/components/schemas/OpenAIResponseInputMessageContentText'
|
||||||
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
input_image: '#/components/schemas/OpenAIResponseInputMessageContentImage'
|
||||||
|
input_file: '#/components/schemas/OpenAIResponseInputMessageContentFile'
|
||||||
|
OpenAIResponseInputMessageContentFile:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
type:
|
||||||
|
type: string
|
||||||
|
const: input_file
|
||||||
|
default: input_file
|
||||||
|
description: >-
|
||||||
|
The type of the input item. Always `input_file`.
|
||||||
|
file_data:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The data of the file to be sent to the model.
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
|
file_url:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The URL of the file to be sent to the model.
|
||||||
|
filename:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
The name of the file to be sent to the model.
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- type
|
||||||
|
title: OpenAIResponseInputMessageContentFile
|
||||||
|
description: >-
|
||||||
|
File content for input messages in OpenAI response format.
|
||||||
OpenAIResponseInputMessageContentImage:
|
OpenAIResponseInputMessageContentImage:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -5499,6 +5992,10 @@ components:
|
||||||
default: input_image
|
default: input_image
|
||||||
description: >-
|
description: >-
|
||||||
Content type identifier, always "input_image"
|
Content type identifier, always "input_image"
|
||||||
|
file_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
(Optional) The ID of the file to be sent to the model.
|
||||||
image_url:
|
image_url:
|
||||||
type: string
|
type: string
|
||||||
description: (Optional) URL of the image content
|
description: (Optional) URL of the image content
|
||||||
|
|
@ -6735,14 +7232,9 @@ components:
|
||||||
Error details for failed OpenAI response requests.
|
Error details for failed OpenAI response requests.
|
||||||
OpenAIResponseInput:
|
OpenAIResponseInput:
|
||||||
oneOf:
|
oneOf:
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageWebSearchToolCall'
|
- $ref: '#/components/schemas/OpenAIResponseOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFileSearchToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageFunctionToolCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
- $ref: '#/components/schemas/OpenAIResponseInputFunctionToolCallOutput'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
- $ref: '#/components/schemas/OpenAIResponseMCPApprovalResponse'
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
|
||||||
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
- $ref: '#/components/schemas/OpenAIResponseMessage'
|
||||||
OpenAIResponseInputToolFileSearch:
|
OpenAIResponseInputToolFileSearch:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -6898,6 +7390,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -6971,6 +7467,30 @@ components:
|
||||||
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
mcp_call: '#/components/schemas/OpenAIResponseOutputMessageMCPCall'
|
||||||
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
mcp_list_tools: '#/components/schemas/OpenAIResponseOutputMessageMCPListTools'
|
||||||
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
mcp_approval_request: '#/components/schemas/OpenAIResponseMCPApprovalRequest'
|
||||||
|
OpenAIResponsePrompt:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
id:
|
||||||
|
type: string
|
||||||
|
description: Unique identifier of the prompt template
|
||||||
|
variables:
|
||||||
|
type: object
|
||||||
|
additionalProperties:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponseInputMessageContent'
|
||||||
|
description: >-
|
||||||
|
Dictionary of variable names to OpenAIResponseInputMessageContent structure
|
||||||
|
for template substitution. The substitution values can either be strings,
|
||||||
|
or other Response input types like images or files.
|
||||||
|
version:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Version number of the prompt to use (defaults to latest if not specified)
|
||||||
|
additionalProperties: false
|
||||||
|
required:
|
||||||
|
- id
|
||||||
|
title: OpenAIResponsePrompt
|
||||||
|
description: >-
|
||||||
|
OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||||
OpenAIResponseText:
|
OpenAIResponseText:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
|
|
@ -7228,6 +7748,10 @@ components:
|
||||||
model:
|
model:
|
||||||
type: string
|
type: string
|
||||||
description: The underlying LLM used for completions.
|
description: The underlying LLM used for completions.
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Prompt object with ID, version, and variables.
|
||||||
instructions:
|
instructions:
|
||||||
type: string
|
type: string
|
||||||
previous_response_id:
|
previous_response_id:
|
||||||
|
|
@ -7305,6 +7829,10 @@ components:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
(Optional) ID of the previous response in a conversation
|
(Optional) ID of the previous response in a conversation
|
||||||
|
prompt:
|
||||||
|
$ref: '#/components/schemas/OpenAIResponsePrompt'
|
||||||
|
description: >-
|
||||||
|
(Optional) Reference to a prompt template and its variables.
|
||||||
status:
|
status:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -9867,7 +10395,7 @@ components:
|
||||||
$ref: '#/components/schemas/RAGDocument'
|
$ref: '#/components/schemas/RAGDocument'
|
||||||
description: >-
|
description: >-
|
||||||
List of documents to index in the RAG system
|
List of documents to index in the RAG system
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
ID of the vector database to store the document embeddings
|
ID of the vector database to store the document embeddings
|
||||||
|
|
@ -9878,7 +10406,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- documents
|
- documents
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunk_size_in_tokens
|
- chunk_size_in_tokens
|
||||||
title: InsertRequest
|
title: InsertRequest
|
||||||
DefaultRAGQueryGeneratorConfig:
|
DefaultRAGQueryGeneratorConfig:
|
||||||
|
|
@ -10049,7 +10577,7 @@ components:
|
||||||
$ref: '#/components/schemas/InterleavedContent'
|
$ref: '#/components/schemas/InterleavedContent'
|
||||||
description: >-
|
description: >-
|
||||||
The query content to search for in the indexed documents
|
The query content to search for in the indexed documents
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: array
|
type: array
|
||||||
items:
|
items:
|
||||||
type: string
|
type: string
|
||||||
|
|
@ -10062,7 +10590,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
title: QueryRequest
|
title: QueryRequest
|
||||||
RAGQueryResult:
|
RAGQueryResult:
|
||||||
type: object
|
type: object
|
||||||
|
|
@ -10190,6 +10718,10 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
The content of the chunk, which can be interleaved text, images, or other
|
The content of the chunk, which can be interleaved text, images, or other
|
||||||
types.
|
types.
|
||||||
|
chunk_id:
|
||||||
|
type: string
|
||||||
|
description: >-
|
||||||
|
Unique identifier for the chunk. Must be provided explicitly.
|
||||||
metadata:
|
metadata:
|
||||||
type: object
|
type: object
|
||||||
additionalProperties:
|
additionalProperties:
|
||||||
|
|
@ -10210,10 +10742,6 @@ components:
|
||||||
description: >-
|
description: >-
|
||||||
Optional embedding for the chunk. If not provided, it will be computed
|
Optional embedding for the chunk. If not provided, it will be computed
|
||||||
later.
|
later.
|
||||||
stored_chunk_id:
|
|
||||||
type: string
|
|
||||||
description: >-
|
|
||||||
The chunk ID that is stored in the vector database. Used for backend functionality.
|
|
||||||
chunk_metadata:
|
chunk_metadata:
|
||||||
$ref: '#/components/schemas/ChunkMetadata'
|
$ref: '#/components/schemas/ChunkMetadata'
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -10222,6 +10750,7 @@ components:
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- content
|
- content
|
||||||
|
- chunk_id
|
||||||
- metadata
|
- metadata
|
||||||
title: Chunk
|
title: Chunk
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -10286,7 +10815,7 @@ components:
|
||||||
InsertChunksRequest:
|
InsertChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to insert the chunks into.
|
The identifier of the vector database to insert the chunks into.
|
||||||
|
|
@ -10305,13 +10834,13 @@ components:
|
||||||
description: The time to live of the chunks.
|
description: The time to live of the chunks.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- chunks
|
- chunks
|
||||||
title: InsertChunksRequest
|
title: InsertChunksRequest
|
||||||
QueryChunksRequest:
|
QueryChunksRequest:
|
||||||
type: object
|
type: object
|
||||||
properties:
|
properties:
|
||||||
vector_db_id:
|
vector_store_id:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The identifier of the vector database to query.
|
The identifier of the vector database to query.
|
||||||
|
|
@ -10331,7 +10860,7 @@ components:
|
||||||
description: The parameters of the query.
|
description: The parameters of the query.
|
||||||
additionalProperties: false
|
additionalProperties: false
|
||||||
required:
|
required:
|
||||||
- vector_db_id
|
- vector_store_id
|
||||||
- query
|
- query
|
||||||
title: QueryChunksRequest
|
title: QueryChunksRequest
|
||||||
QueryChunksResponse:
|
QueryChunksResponse:
|
||||||
|
|
@ -11600,7 +12129,6 @@ components:
|
||||||
description: The sampling strategy.
|
description: The sampling strategy.
|
||||||
max_tokens:
|
max_tokens:
|
||||||
type: integer
|
type: integer
|
||||||
default: 0
|
|
||||||
description: >-
|
description: >-
|
||||||
The maximum number of tokens that can be generated in the completion.
|
The maximum number of tokens that can be generated in the completion.
|
||||||
The token count of your prompt plus max_tokens cannot exceed the model's
|
The token count of your prompt plus max_tokens cannot exceed the model's
|
||||||
|
|
@ -11850,7 +12378,7 @@ components:
|
||||||
description: Type of the step in an agent turn.
|
description: Type of the step in an agent turn.
|
||||||
const: memory_retrieval
|
const: memory_retrieval
|
||||||
default: memory_retrieval
|
default: memory_retrieval
|
||||||
vector_db_ids:
|
vector_store_ids:
|
||||||
type: string
|
type: string
|
||||||
description: >-
|
description: >-
|
||||||
The IDs of the vector databases to retrieve context from.
|
The IDs of the vector databases to retrieve context from.
|
||||||
|
|
@ -11863,7 +12391,7 @@ components:
|
||||||
- turn_id
|
- turn_id
|
||||||
- step_id
|
- step_id
|
||||||
- step_type
|
- step_type
|
||||||
- vector_db_ids
|
- vector_store_ids
|
||||||
- inserted_context
|
- inserted_context
|
||||||
title: MemoryRetrievalStep
|
title: MemoryRetrievalStep
|
||||||
description: >-
|
description: >-
|
||||||
|
|
@ -13460,6 +13988,19 @@ tags:
|
||||||
description: >-
|
description: >-
|
||||||
APIs for creating and interacting with agentic systems.
|
APIs for creating and interacting with agentic systems.
|
||||||
x-displayName: Agents
|
x-displayName: Agents
|
||||||
|
- name: Batches
|
||||||
|
description: >-
|
||||||
|
The API is designed to allow use of openai client libraries for seamless integration.
|
||||||
|
|
||||||
|
|
||||||
|
This API provides the following extensions:
|
||||||
|
- idempotent batch creation
|
||||||
|
|
||||||
|
Note: This API is currently under active development and may undergo changes.
|
||||||
|
x-displayName: >-
|
||||||
|
The Batches API enables efficient processing of multiple requests in a single
|
||||||
|
operation, particularly useful for processing large datasets, batch evaluation
|
||||||
|
workflows, and cost-effective inference at scale.
|
||||||
- name: Benchmarks
|
- name: Benchmarks
|
||||||
description: ''
|
description: ''
|
||||||
- name: Conversations
|
- name: Conversations
|
||||||
|
|
@ -13534,6 +14075,7 @@ x-tagGroups:
|
||||||
- name: Operations
|
- name: Operations
|
||||||
tags:
|
tags:
|
||||||
- Agents
|
- Agents
|
||||||
|
- Batches
|
||||||
- Benchmarks
|
- Benchmarks
|
||||||
- Conversations
|
- Conversations
|
||||||
- DatasetIO
|
- DatasetIO
|
||||||
|
|
|
||||||
|
|
@ -1,7 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from .telemetry import *
|
|
||||||
|
|
@ -1,250 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
import os
|
|
||||||
import threading
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from opentelemetry import metrics, trace
|
|
||||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
|
||||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
|
||||||
from opentelemetry.sdk.metrics import MeterProvider
|
|
||||||
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
|
|
||||||
from opentelemetry.sdk.trace import TracerProvider
|
|
||||||
from opentelemetry.sdk.trace.export import BatchSpanProcessor
|
|
||||||
from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator
|
|
||||||
|
|
||||||
from llama_stack.apis.telemetry import (
|
|
||||||
Event,
|
|
||||||
MetricEvent,
|
|
||||||
SpanEndPayload,
|
|
||||||
SpanStartPayload,
|
|
||||||
SpanStatus,
|
|
||||||
StructuredLogEvent,
|
|
||||||
UnstructuredLogEvent,
|
|
||||||
)
|
|
||||||
from llama_stack.apis.telemetry import (
|
|
||||||
Telemetry as TelemetryBase,
|
|
||||||
)
|
|
||||||
from llama_stack.core.telemetry.tracing import ROOT_SPAN_MARKERS
|
|
||||||
from llama_stack.log import get_logger
|
|
||||||
|
|
||||||
_GLOBAL_STORAGE: dict[str, dict[str | int, Any]] = {
|
|
||||||
"active_spans": {},
|
|
||||||
"counters": {},
|
|
||||||
"gauges": {},
|
|
||||||
"up_down_counters": {},
|
|
||||||
}
|
|
||||||
_global_lock = threading.Lock()
|
|
||||||
_TRACER_PROVIDER = None
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="telemetry")
|
|
||||||
|
|
||||||
|
|
||||||
def is_tracing_enabled(tracer):
|
|
||||||
with tracer.start_as_current_span("check_tracing") as span:
|
|
||||||
return span.is_recording()
|
|
||||||
|
|
||||||
|
|
||||||
class Telemetry(TelemetryBase):
|
|
||||||
def __init__(self) -> None:
|
|
||||||
self.meter = None
|
|
||||||
|
|
||||||
global _TRACER_PROVIDER
|
|
||||||
# Initialize the correct span processor based on the provider state.
|
|
||||||
# This is needed since once the span processor is set, it cannot be unset.
|
|
||||||
# Recreating the telemetry adapter multiple times will result in duplicate span processors.
|
|
||||||
# Since the library client can be recreated multiple times in a notebook,
|
|
||||||
# the kernel will hold on to the span processor and cause duplicate spans to be written.
|
|
||||||
if os.environ.get("OTEL_EXPORTER_OTLP_ENDPOINT"):
|
|
||||||
if _TRACER_PROVIDER is None:
|
|
||||||
provider = TracerProvider()
|
|
||||||
trace.set_tracer_provider(provider)
|
|
||||||
_TRACER_PROVIDER = provider
|
|
||||||
|
|
||||||
# Use single OTLP endpoint for all telemetry signals
|
|
||||||
|
|
||||||
# Let OpenTelemetry SDK handle endpoint construction automatically
|
|
||||||
# The SDK will read OTEL_EXPORTER_OTLP_ENDPOINT and construct appropriate URLs
|
|
||||||
# https://opentelemetry.io/docs/languages/sdk-configuration/otlp-exporter
|
|
||||||
span_exporter = OTLPSpanExporter()
|
|
||||||
span_processor = BatchSpanProcessor(span_exporter)
|
|
||||||
trace.get_tracer_provider().add_span_processor(span_processor)
|
|
||||||
|
|
||||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
|
||||||
metric_provider = MeterProvider(metric_readers=[metric_reader])
|
|
||||||
metrics.set_meter_provider(metric_provider)
|
|
||||||
self.is_otel_endpoint_set = True
|
|
||||||
else:
|
|
||||||
logger.warning("OTEL_EXPORTER_OTLP_ENDPOINT is not set, skipping telemetry")
|
|
||||||
self.is_otel_endpoint_set = False
|
|
||||||
|
|
||||||
self.meter = metrics.get_meter(__name__)
|
|
||||||
self._lock = _global_lock
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
pass
|
|
||||||
|
|
||||||
async def shutdown(self) -> None:
|
|
||||||
if self.is_otel_endpoint_set:
|
|
||||||
trace.get_tracer_provider().force_flush()
|
|
||||||
|
|
||||||
async def log_event(self, event: Event, ttl_seconds: int = 604800) -> None:
|
|
||||||
if isinstance(event, UnstructuredLogEvent):
|
|
||||||
self._log_unstructured(event, ttl_seconds)
|
|
||||||
elif isinstance(event, MetricEvent):
|
|
||||||
self._log_metric(event)
|
|
||||||
elif isinstance(event, StructuredLogEvent):
|
|
||||||
self._log_structured(event, ttl_seconds)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown event type: {event}")
|
|
||||||
|
|
||||||
def _log_unstructured(self, event: UnstructuredLogEvent, ttl_seconds: int) -> None:
|
|
||||||
with self._lock:
|
|
||||||
# Use global storage instead of instance storage
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
|
||||||
|
|
||||||
if span:
|
|
||||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
|
||||||
span.add_event(
|
|
||||||
name=event.type.value,
|
|
||||||
attributes={
|
|
||||||
"message": event.message,
|
|
||||||
"severity": event.severity.value,
|
|
||||||
"__ttl__": ttl_seconds,
|
|
||||||
**(event.attributes or {}),
|
|
||||||
},
|
|
||||||
timestamp=timestamp_ns,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
print(f"Warning: No active span found for span_id {span_id}. Dropping event: {event}")
|
|
||||||
|
|
||||||
def _get_or_create_counter(self, name: str, unit: str) -> metrics.Counter:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["counters"]:
|
|
||||||
_GLOBAL_STORAGE["counters"][name] = self.meter.create_counter(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"Counter for {name}",
|
|
||||||
)
|
|
||||||
return _GLOBAL_STORAGE["counters"][name]
|
|
||||||
|
|
||||||
def _get_or_create_gauge(self, name: str, unit: str) -> metrics.ObservableGauge:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["gauges"]:
|
|
||||||
_GLOBAL_STORAGE["gauges"][name] = self.meter.create_gauge(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"Gauge for {name}",
|
|
||||||
)
|
|
||||||
return _GLOBAL_STORAGE["gauges"][name]
|
|
||||||
|
|
||||||
def _log_metric(self, event: MetricEvent) -> None:
|
|
||||||
# Add metric as an event to the current span
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
# Only try to add to span if we have a valid span_id
|
|
||||||
if event.span_id:
|
|
||||||
try:
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
|
||||||
|
|
||||||
if span:
|
|
||||||
timestamp_ns = int(event.timestamp.timestamp() * 1e9)
|
|
||||||
span.add_event(
|
|
||||||
name=f"metric.{event.metric}",
|
|
||||||
attributes={
|
|
||||||
"value": event.value,
|
|
||||||
"unit": event.unit,
|
|
||||||
**(event.attributes or {}),
|
|
||||||
},
|
|
||||||
timestamp=timestamp_ns,
|
|
||||||
)
|
|
||||||
except (ValueError, KeyError):
|
|
||||||
# Invalid span_id or span not found, but we already logged to console above
|
|
||||||
pass
|
|
||||||
except Exception:
|
|
||||||
# Lock acquisition failed
|
|
||||||
logger.debug("Failed to acquire lock to add metric to span")
|
|
||||||
|
|
||||||
# Log to OpenTelemetry meter if available
|
|
||||||
if self.meter is None:
|
|
||||||
return
|
|
||||||
if isinstance(event.value, int):
|
|
||||||
counter = self._get_or_create_counter(event.metric, event.unit)
|
|
||||||
counter.add(event.value, attributes=event.attributes)
|
|
||||||
elif isinstance(event.value, float):
|
|
||||||
up_down_counter = self._get_or_create_up_down_counter(event.metric, event.unit)
|
|
||||||
up_down_counter.add(event.value, attributes=event.attributes)
|
|
||||||
|
|
||||||
def _get_or_create_up_down_counter(self, name: str, unit: str) -> metrics.UpDownCounter:
|
|
||||||
assert self.meter is not None
|
|
||||||
if name not in _GLOBAL_STORAGE["up_down_counters"]:
|
|
||||||
_GLOBAL_STORAGE["up_down_counters"][name] = self.meter.create_up_down_counter(
|
|
||||||
name=name,
|
|
||||||
unit=unit,
|
|
||||||
description=f"UpDownCounter for {name}",
|
|
||||||
)
|
|
||||||
return _GLOBAL_STORAGE["up_down_counters"][name]
|
|
||||||
|
|
||||||
def _log_structured(self, event: StructuredLogEvent, ttl_seconds: int) -> None:
|
|
||||||
with self._lock:
|
|
||||||
span_id = int(event.span_id, 16)
|
|
||||||
tracer = trace.get_tracer(__name__)
|
|
||||||
if event.attributes is None:
|
|
||||||
event.attributes = {}
|
|
||||||
event.attributes["__ttl__"] = ttl_seconds
|
|
||||||
|
|
||||||
# Extract these W3C trace context attributes so they are not written to
|
|
||||||
# underlying storage, as we just need them to propagate the trace context.
|
|
||||||
traceparent = event.attributes.pop("traceparent", None)
|
|
||||||
tracestate = event.attributes.pop("tracestate", None)
|
|
||||||
if traceparent:
|
|
||||||
# If we have a traceparent header value, we're not the root span.
|
|
||||||
for root_attribute in ROOT_SPAN_MARKERS:
|
|
||||||
event.attributes.pop(root_attribute, None)
|
|
||||||
|
|
||||||
if isinstance(event.payload, SpanStartPayload):
|
|
||||||
# Check if span already exists to prevent duplicates
|
|
||||||
if span_id in _GLOBAL_STORAGE["active_spans"]:
|
|
||||||
return
|
|
||||||
|
|
||||||
context = None
|
|
||||||
if event.payload.parent_span_id:
|
|
||||||
parent_span_id = int(event.payload.parent_span_id, 16)
|
|
||||||
parent_span = _GLOBAL_STORAGE["active_spans"].get(parent_span_id)
|
|
||||||
context = trace.set_span_in_context(parent_span)
|
|
||||||
elif traceparent:
|
|
||||||
carrier = {
|
|
||||||
"traceparent": traceparent,
|
|
||||||
"tracestate": tracestate,
|
|
||||||
}
|
|
||||||
context = TraceContextTextMapPropagator().extract(carrier=carrier)
|
|
||||||
|
|
||||||
span = tracer.start_span(
|
|
||||||
name=event.payload.name,
|
|
||||||
context=context,
|
|
||||||
attributes=event.attributes or {},
|
|
||||||
)
|
|
||||||
_GLOBAL_STORAGE["active_spans"][span_id] = span
|
|
||||||
|
|
||||||
elif isinstance(event.payload, SpanEndPayload):
|
|
||||||
span = _GLOBAL_STORAGE["active_spans"].get(span_id)
|
|
||||||
if span:
|
|
||||||
if event.attributes:
|
|
||||||
span.set_attributes(event.attributes)
|
|
||||||
|
|
||||||
status = (
|
|
||||||
trace.Status(status_code=trace.StatusCode.OK)
|
|
||||||
if event.payload.status == SpanStatus.OK
|
|
||||||
else trace.Status(status_code=trace.StatusCode.ERROR)
|
|
||||||
)
|
|
||||||
span.set_status(status)
|
|
||||||
span.end()
|
|
||||||
_GLOBAL_STORAGE["active_spans"].pop(span_id, None)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown structured log event: {event}")
|
|
||||||
|
|
@ -1,40 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
from collections.abc import AsyncGenerator
|
|
||||||
from contextvars import ContextVar
|
|
||||||
|
|
||||||
|
|
||||||
def preserve_contexts_async_generator[T](
|
|
||||||
gen: AsyncGenerator[T, None], context_vars: list[ContextVar]
|
|
||||||
) -> AsyncGenerator[T, None]:
|
|
||||||
"""
|
|
||||||
Wraps an async generator to preserve context variables across iterations.
|
|
||||||
This is needed because we start a new asyncio event loop for each streaming request,
|
|
||||||
and we need to preserve the context across the event loop boundary.
|
|
||||||
"""
|
|
||||||
# Capture initial context values
|
|
||||||
initial_context_values = {context_var.name: context_var.get() for context_var in context_vars}
|
|
||||||
|
|
||||||
async def wrapper() -> AsyncGenerator[T, None]:
|
|
||||||
while True:
|
|
||||||
try:
|
|
||||||
# Restore context values before any await
|
|
||||||
for context_var in context_vars:
|
|
||||||
context_var.set(initial_context_values[context_var.name])
|
|
||||||
|
|
||||||
item = await gen.__anext__()
|
|
||||||
|
|
||||||
# Update our tracked values with any changes made during this iteration
|
|
||||||
for context_var in context_vars:
|
|
||||||
initial_context_values[context_var.name] = context_var.get()
|
|
||||||
|
|
||||||
yield item
|
|
||||||
|
|
||||||
except StopAsyncIteration:
|
|
||||||
break
|
|
||||||
|
|
||||||
return wrapper()
|
|
||||||
|
|
@ -1,61 +0,0 @@
|
||||||
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
||||||
# All rights reserved.
|
|
||||||
#
|
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
|
||||||
# the root directory of this source tree.
|
|
||||||
|
|
||||||
|
|
||||||
from llama_stack.log import get_logger
|
|
||||||
from llama_stack.providers.utils.inference.openai_mixin import OpenAIMixin
|
|
||||||
|
|
||||||
from . import NVIDIAConfig
|
|
||||||
from .utils import _is_nvidia_hosted
|
|
||||||
|
|
||||||
logger = get_logger(name=__name__, category="inference::nvidia")
|
|
||||||
|
|
||||||
|
|
||||||
class NVIDIAInferenceAdapter(OpenAIMixin):
|
|
||||||
config: NVIDIAConfig
|
|
||||||
|
|
||||||
"""
|
|
||||||
NVIDIA Inference Adapter for Llama Stack.
|
|
||||||
"""
|
|
||||||
|
|
||||||
# source: https://docs.nvidia.com/nim/nemo-retriever/text-embedding/latest/support-matrix.html
|
|
||||||
embedding_model_metadata: dict[str, dict[str, int]] = {
|
|
||||||
"nvidia/llama-3.2-nv-embedqa-1b-v2": {"embedding_dimension": 2048, "context_length": 8192},
|
|
||||||
"nvidia/nv-embedqa-e5-v5": {"embedding_dimension": 512, "context_length": 1024},
|
|
||||||
"nvidia/nv-embedqa-mistral-7b-v2": {"embedding_dimension": 512, "context_length": 4096},
|
|
||||||
"snowflake/arctic-embed-l": {"embedding_dimension": 512, "context_length": 1024},
|
|
||||||
}
|
|
||||||
|
|
||||||
async def initialize(self) -> None:
|
|
||||||
logger.info(f"Initializing NVIDIAInferenceAdapter({self.config.url})...")
|
|
||||||
|
|
||||||
if _is_nvidia_hosted(self.config):
|
|
||||||
if not self.config.auth_credential:
|
|
||||||
raise RuntimeError(
|
|
||||||
"API key is required for hosted NVIDIA NIM. Either provide an API key or use a self-hosted NIM."
|
|
||||||
)
|
|
||||||
|
|
||||||
def get_api_key(self) -> str:
|
|
||||||
"""
|
|
||||||
Get the API key for OpenAI mixin.
|
|
||||||
|
|
||||||
:return: The NVIDIA API key
|
|
||||||
"""
|
|
||||||
if self.config.auth_credential:
|
|
||||||
return self.config.auth_credential.get_secret_value()
|
|
||||||
|
|
||||||
if not _is_nvidia_hosted(self.config):
|
|
||||||
return "NO KEY REQUIRED"
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def get_base_url(self) -> str:
|
|
||||||
"""
|
|
||||||
Get the base URL for OpenAI mixin.
|
|
||||||
|
|
||||||
:return: The NVIDIA API base URL
|
|
||||||
"""
|
|
||||||
return f"{self.config.url}/v1" if self.config.append_api_version else self.config.url
|
|
||||||
204
pyproject.toml
204
pyproject.toml
|
|
@ -31,7 +31,7 @@ dependencies = [
|
||||||
"jinja2>=3.1.6",
|
"jinja2>=3.1.6",
|
||||||
"jsonschema",
|
"jsonschema",
|
||||||
"llama-stack-client>=0.3.0",
|
"llama-stack-client>=0.3.0",
|
||||||
"openai>=1.107", # for expires_after support
|
"openai>=2.5.0",
|
||||||
"prompt-toolkit",
|
"prompt-toolkit",
|
||||||
"python-dotenv",
|
"python-dotenv",
|
||||||
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
|
"pyjwt[crypto]>=2.10.0", # Pull crypto to support RS256 for jwt. Requires 2.10.0+ for ssl_context support.
|
||||||
|
|
@ -67,17 +67,48 @@ dev = [
|
||||||
"pytest-cov",
|
"pytest-cov",
|
||||||
"pytest-html",
|
"pytest-html",
|
||||||
"pytest-json-report",
|
"pytest-json-report",
|
||||||
"pytest-socket", # For blocking network access in unit tests
|
"pytest-socket", # For blocking network access in unit tests
|
||||||
"nbval", # For notebook testing
|
"nbval", # For notebook testing
|
||||||
"black",
|
"black",
|
||||||
"ruff",
|
"ruff",
|
||||||
|
"mypy",
|
||||||
|
"pre-commit",
|
||||||
|
"ruamel.yaml", # needed for openapi generator
|
||||||
|
]
|
||||||
|
# Type checking dependencies - includes type stubs and optional runtime dependencies
|
||||||
|
# needed for complete mypy coverage across all optional features
|
||||||
|
type_checking = [
|
||||||
"types-requests",
|
"types-requests",
|
||||||
"types-setuptools",
|
"types-setuptools",
|
||||||
"pre-commit",
|
"types-jsonschema",
|
||||||
"ruamel.yaml", # needed for openapi generator
|
"pandas-stubs",
|
||||||
|
"types-psutil",
|
||||||
|
"types-tqdm",
|
||||||
|
"boto3-stubs[s3]",
|
||||||
|
"streamlit",
|
||||||
|
"streamlit-option-menu",
|
||||||
|
"pandas",
|
||||||
|
"anthropic",
|
||||||
|
"databricks-sdk",
|
||||||
|
"fairscale",
|
||||||
|
"torchtune",
|
||||||
|
"trl",
|
||||||
|
"peft",
|
||||||
|
"datasets",
|
||||||
|
"together",
|
||||||
|
"nest-asyncio",
|
||||||
|
"pymongo",
|
||||||
|
"torchvision",
|
||||||
|
"sqlite-vec",
|
||||||
|
"faiss-cpu",
|
||||||
|
"lm-format-enforcer",
|
||||||
|
"mcp",
|
||||||
|
"ollama",
|
||||||
]
|
]
|
||||||
# These are the dependencies required for running unit tests.
|
# These are the dependencies required for running unit tests.
|
||||||
unit = [
|
unit = [
|
||||||
|
"anthropic",
|
||||||
|
"databricks-sdk",
|
||||||
"sqlite-vec",
|
"sqlite-vec",
|
||||||
"ollama",
|
"ollama",
|
||||||
"aiosqlite",
|
"aiosqlite",
|
||||||
|
|
@ -151,7 +182,7 @@ llama = "llama_stack.cli.llama:main"
|
||||||
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
|
install-wheel-from-presigned = "llama_stack.cli.scripts.run:install_wheel_from_presigned"
|
||||||
|
|
||||||
[tool.setuptools.packages.find]
|
[tool.setuptools.packages.find]
|
||||||
where = ["."]
|
where = ["src"]
|
||||||
include = ["llama_stack", "llama_stack.*"]
|
include = ["llama_stack", "llama_stack.*"]
|
||||||
|
|
||||||
[[tool.uv.index]]
|
[[tool.uv.index]]
|
||||||
|
|
@ -218,17 +249,17 @@ unfixable = [
|
||||||
# Ignore the following errors for the following files
|
# Ignore the following errors for the following files
|
||||||
[tool.ruff.lint.per-file-ignores]
|
[tool.ruff.lint.per-file-ignores]
|
||||||
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
|
"tests/**/*.py" = ["DTZ"] # Ignore datetime rules for tests
|
||||||
"llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
|
"src/llama_stack/providers/inline/scoring/basic/utils/ifeval_utils.py" = ["RUF001"]
|
||||||
"llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
|
"src/llama_stack/providers/inline/scoring/basic/scoring_fn/fn_defs/regex_parser_multiple_choice_answer.py" = [
|
||||||
"RUF001",
|
"RUF001",
|
||||||
"PLE2515",
|
"PLE2515",
|
||||||
]
|
]
|
||||||
"llama_stack/apis/**/__init__.py" = [
|
"src/llama_stack/apis/**/__init__.py" = [
|
||||||
"F403",
|
"F403",
|
||||||
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
|
] # Using import * is acceptable (or at least tolerated) in an __init__.py of a package API
|
||||||
|
|
||||||
[tool.mypy]
|
[tool.mypy]
|
||||||
mypy_path = ["llama_stack"]
|
mypy_path = ["src"]
|
||||||
packages = ["llama_stack"]
|
packages = ["llama_stack"]
|
||||||
plugins = ['pydantic.mypy']
|
plugins = ['pydantic.mypy']
|
||||||
disable_error_code = []
|
disable_error_code = []
|
||||||
|
|
@ -240,82 +271,91 @@ follow_imports = "silent"
|
||||||
# to exclude the entire directory.
|
# to exclude the entire directory.
|
||||||
exclude = [
|
exclude = [
|
||||||
# As we fix more and more of these, we should remove them from the list
|
# As we fix more and more of these, we should remove them from the list
|
||||||
"^llama_stack.core/build\\.py$",
|
"^src/llama_stack/core/build\\.py$",
|
||||||
"^llama_stack.core/client\\.py$",
|
"^src/llama_stack/core/client\\.py$",
|
||||||
"^llama_stack.core/request_headers\\.py$",
|
"^src/llama_stack/core/request_headers\\.py$",
|
||||||
"^llama_stack.core/routers/",
|
"^src/llama_stack/core/routers/",
|
||||||
"^llama_stack.core/routing_tables/",
|
"^src/llama_stack/core/routing_tables/",
|
||||||
"^llama_stack.core/server/endpoints\\.py$",
|
"^src/llama_stack/core/server/endpoints\\.py$",
|
||||||
"^llama_stack.core/server/server\\.py$",
|
"^src/llama_stack/core/server/server\\.py$",
|
||||||
"^llama_stack.core/stack\\.py$",
|
"^src/llama_stack/core/stack\\.py$",
|
||||||
"^llama_stack.core/store/registry\\.py$",
|
"^src/llama_stack/core/store/registry\\.py$",
|
||||||
"^llama_stack.core/utils/exec\\.py$",
|
"^src/llama_stack/core/utils/exec\\.py$",
|
||||||
"^llama_stack.core/utils/prompt_for_config\\.py$",
|
"^src/llama_stack/core/utils/prompt_for_config\\.py$",
|
||||||
"^llama_stack/models/llama/llama3/interface\\.py$",
|
"^src/llama_stack/models/llama/llama3/interface\\.py$",
|
||||||
"^llama_stack/models/llama/llama3/tokenizer\\.py$",
|
"^src/llama_stack/models/llama/llama3/tokenizer\\.py$",
|
||||||
"^llama_stack/models/llama/llama3/tool_utils\\.py$",
|
"^src/llama_stack/models/llama/llama3/tool_utils\\.py$",
|
||||||
"^llama_stack/providers/inline/agents/meta_reference/",
|
"^src/llama_stack/providers/inline/datasetio/localfs/",
|
||||||
"^llama_stack/providers/inline/datasetio/localfs/",
|
"^src/llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
||||||
"^llama_stack/providers/inline/eval/meta_reference/eval\\.py$",
|
"^src/llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
||||||
"^llama_stack/providers/inline/inference/meta_reference/inference\\.py$",
|
"^src/llama_stack/models/llama/llama3/generation\\.py$",
|
||||||
"^llama_stack/models/llama/llama3/generation\\.py$",
|
"^src/llama_stack/models/llama/llama3/multimodal/model\\.py$",
|
||||||
"^llama_stack/models/llama/llama3/multimodal/model\\.py$",
|
"^src/llama_stack/models/llama/llama4/",
|
||||||
"^llama_stack/models/llama/llama4/",
|
"^src/llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
|
||||||
"^llama_stack/providers/inline/inference/sentence_transformers/sentence_transformers\\.py$",
|
"^src/llama_stack/providers/inline/post_training/common/validator\\.py$",
|
||||||
"^llama_stack/providers/inline/post_training/common/validator\\.py$",
|
"^src/llama_stack/providers/inline/safety/code_scanner/",
|
||||||
"^llama_stack/providers/inline/safety/code_scanner/",
|
"^src/llama_stack/providers/inline/safety/llama_guard/",
|
||||||
"^llama_stack/providers/inline/safety/llama_guard/",
|
"^src/llama_stack/providers/inline/scoring/basic/",
|
||||||
"^llama_stack/providers/inline/scoring/basic/",
|
"^src/llama_stack/providers/inline/scoring/braintrust/",
|
||||||
"^llama_stack/providers/inline/scoring/braintrust/",
|
"^src/llama_stack/providers/inline/scoring/llm_as_judge/",
|
||||||
"^llama_stack/providers/inline/scoring/llm_as_judge/",
|
"^src/llama_stack/providers/remote/agents/sample/",
|
||||||
"^llama_stack/providers/remote/agents/sample/",
|
"^src/llama_stack/providers/remote/datasetio/huggingface/",
|
||||||
"^llama_stack/providers/remote/datasetio/huggingface/",
|
"^src/llama_stack/providers/remote/datasetio/nvidia/",
|
||||||
"^llama_stack/providers/remote/datasetio/nvidia/",
|
"^src/llama_stack/providers/remote/inference/bedrock/",
|
||||||
"^llama_stack/providers/remote/inference/bedrock/",
|
"^src/llama_stack/providers/remote/inference/nvidia/",
|
||||||
"^llama_stack/providers/remote/inference/nvidia/",
|
"^src/llama_stack/providers/remote/inference/passthrough/",
|
||||||
"^llama_stack/providers/remote/inference/passthrough/",
|
"^src/llama_stack/providers/remote/inference/runpod/",
|
||||||
"^llama_stack/providers/remote/inference/runpod/",
|
"^src/llama_stack/providers/remote/inference/tgi/",
|
||||||
"^llama_stack/providers/remote/inference/tgi/",
|
"^src/llama_stack/providers/remote/inference/watsonx/",
|
||||||
"^llama_stack/providers/remote/inference/watsonx/",
|
"^src/llama_stack/providers/remote/safety/bedrock/",
|
||||||
"^llama_stack/providers/remote/safety/bedrock/",
|
"^src/llama_stack/providers/remote/safety/nvidia/",
|
||||||
"^llama_stack/providers/remote/safety/nvidia/",
|
"^src/llama_stack/providers/remote/safety/sambanova/",
|
||||||
"^llama_stack/providers/remote/safety/sambanova/",
|
"^src/llama_stack/providers/remote/safety/sample/",
|
||||||
"^llama_stack/providers/remote/safety/sample/",
|
"^src/llama_stack/providers/remote/tool_runtime/bing_search/",
|
||||||
"^llama_stack/providers/remote/tool_runtime/bing_search/",
|
"^src/llama_stack/providers/remote/tool_runtime/brave_search/",
|
||||||
"^llama_stack/providers/remote/tool_runtime/brave_search/",
|
"^src/llama_stack/providers/remote/tool_runtime/model_context_protocol/",
|
||||||
"^llama_stack/providers/remote/tool_runtime/model_context_protocol/",
|
"^src/llama_stack/providers/remote/tool_runtime/tavily_search/",
|
||||||
"^llama_stack/providers/remote/tool_runtime/tavily_search/",
|
"^src/llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
|
||||||
"^llama_stack/providers/remote/tool_runtime/wolfram_alpha/",
|
"^src/llama_stack/providers/remote/post_training/nvidia/",
|
||||||
"^llama_stack/providers/remote/post_training/nvidia/",
|
"^src/llama_stack/providers/remote/vector_io/chroma/",
|
||||||
"^llama_stack/providers/remote/vector_io/chroma/",
|
"^src/llama_stack/providers/remote/vector_io/milvus/",
|
||||||
"^llama_stack/providers/remote/vector_io/milvus/",
|
"^src/llama_stack/providers/remote/vector_io/pgvector/",
|
||||||
"^llama_stack/providers/remote/vector_io/pgvector/",
|
"^src/llama_stack/providers/remote/vector_io/qdrant/",
|
||||||
"^llama_stack/providers/remote/vector_io/qdrant/",
|
"^src/llama_stack/providers/remote/vector_io/sample/",
|
||||||
"^llama_stack/providers/remote/vector_io/sample/",
|
"^src/llama_stack/providers/remote/vector_io/weaviate/",
|
||||||
"^llama_stack/providers/remote/vector_io/weaviate/",
|
"^src/llama_stack/providers/utils/bedrock/client\\.py$",
|
||||||
"^llama_stack/providers/utils/bedrock/client\\.py$",
|
"^src/llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
|
||||||
"^llama_stack/providers/utils/bedrock/refreshable_boto_session\\.py$",
|
"^src/llama_stack/providers/utils/inference/embedding_mixin\\.py$",
|
||||||
"^llama_stack/providers/utils/inference/embedding_mixin\\.py$",
|
"^src/llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
|
||||||
"^llama_stack/providers/utils/inference/litellm_openai_mixin\\.py$",
|
"^src/llama_stack/providers/utils/inference/model_registry\\.py$",
|
||||||
"^llama_stack/providers/utils/inference/model_registry\\.py$",
|
"^src/llama_stack/providers/utils/inference/openai_compat\\.py$",
|
||||||
"^llama_stack/providers/utils/inference/openai_compat\\.py$",
|
"^src/llama_stack/providers/utils/inference/prompt_adapter\\.py$",
|
||||||
"^llama_stack/providers/utils/inference/prompt_adapter\\.py$",
|
"^src/llama_stack/providers/utils/kvstore/kvstore\\.py$",
|
||||||
"^llama_stack/providers/utils/kvstore/kvstore\\.py$",
|
"^src/llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
|
||||||
"^llama_stack/providers/utils/kvstore/postgres/postgres\\.py$",
|
"^src/llama_stack/providers/utils/kvstore/redis/redis\\.py$",
|
||||||
"^llama_stack/providers/utils/kvstore/redis/redis\\.py$",
|
"^src/llama_stack/providers/utils/memory/vector_store\\.py$",
|
||||||
"^llama_stack/providers/utils/memory/vector_store\\.py$",
|
"^src/llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
|
||||||
"^llama_stack/providers/utils/scoring/aggregation_utils\\.py$",
|
"^src/llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
|
||||||
"^llama_stack/providers/utils/scoring/base_scoring_fn\\.py$",
|
"^src/llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
|
||||||
"^llama_stack/providers/utils/telemetry/dataset_mixin\\.py$",
|
"^src/llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
|
||||||
"^llama_stack/providers/utils/telemetry/trace_protocol\\.py$",
|
"^src/llama_stack/providers/utils/telemetry/tracing\\.py$",
|
||||||
"^llama_stack/providers/utils/telemetry/tracing\\.py$",
|
"^src/llama_stack/strong_typing/auxiliary\\.py$",
|
||||||
"^llama_stack/strong_typing/auxiliary\\.py$",
|
"^src/llama_stack/distributions/template\\.py$",
|
||||||
"^llama_stack/distributions/template\\.py$",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[tool.mypy.overrides]]
|
[[tool.mypy.overrides]]
|
||||||
# packages that lack typing annotations, do not have stubs, or are unavailable.
|
# packages that lack typing annotations, do not have stubs, or are unavailable.
|
||||||
module = ["yaml", "fire"]
|
module = [
|
||||||
|
"yaml",
|
||||||
|
"fire",
|
||||||
|
"torchtune.*",
|
||||||
|
"fairscale.*",
|
||||||
|
"torchvision.*",
|
||||||
|
"datasets",
|
||||||
|
"nest_asyncio",
|
||||||
|
"streamlit_option_menu",
|
||||||
|
"lmformatenforcer.*",
|
||||||
|
]
|
||||||
ignore_missing_imports = true
|
ignore_missing_imports = true
|
||||||
|
|
||||||
[tool.pydantic-mypy]
|
[tool.pydantic-mypy]
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,7 @@ if (( BASH_VERSINFO[0] < 4 )); then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
PACKAGE_DIR="${1:-llama_stack}"
|
PACKAGE_DIR="${1:-src/llama_stack}"
|
||||||
|
|
||||||
if [ ! -d "$PACKAGE_DIR" ]; then
|
if [ ! -d "$PACKAGE_DIR" ]; then
|
||||||
echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"
|
echo "ERROR: Package directory '$PACKAGE_DIR' does not exist"
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ def process_distro(distro_dir: Path, progress, change_tracker: ChangedPathTracke
|
||||||
if template_func := getattr(module, "get_distribution_template", None):
|
if template_func := getattr(module, "get_distribution_template", None):
|
||||||
distro = template_func()
|
distro = template_func()
|
||||||
|
|
||||||
yaml_output_dir = REPO_ROOT / "llama_stack" / "distributions" / distro.name
|
yaml_output_dir = REPO_ROOT / "src" / "llama_stack" / "distributions" / distro.name
|
||||||
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
|
doc_output_dir = REPO_ROOT / "docs/docs/distributions" / f"{distro.distro_type}_distro"
|
||||||
change_tracker.add_paths(yaml_output_dir, doc_output_dir)
|
change_tracker.add_paths(yaml_output_dir, doc_output_dir)
|
||||||
distro.save_distribution(
|
distro.save_distribution(
|
||||||
|
|
@ -93,7 +93,7 @@ def pre_import_distros(distro_dirs: list[Path]) -> None:
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
distros_dir = REPO_ROOT / "llama_stack" / "distributions"
|
distros_dir = REPO_ROOT / "src" / "llama_stack" / "distributions"
|
||||||
change_tracker = ChangedPathTracker()
|
change_tracker = ChangedPathTracker()
|
||||||
|
|
||||||
with Progress(
|
with Progress(
|
||||||
|
|
|
||||||
|
|
@ -30,8 +30,10 @@ materialize_telemetry_configs() {
|
||||||
local otel_cfg="${dest}/otel-collector-config.yaml"
|
local otel_cfg="${dest}/otel-collector-config.yaml"
|
||||||
local prom_cfg="${dest}/prometheus.yml"
|
local prom_cfg="${dest}/prometheus.yml"
|
||||||
local graf_cfg="${dest}/grafana-datasources.yaml"
|
local graf_cfg="${dest}/grafana-datasources.yaml"
|
||||||
|
local graf_dash_cfg="${dest}/grafana-dashboards.yaml"
|
||||||
|
local dash_json="${dest}/llama-stack-dashboard.json"
|
||||||
|
|
||||||
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg"; do
|
for asset in "$otel_cfg" "$prom_cfg" "$graf_cfg" "$graf_dash_cfg" "$dash_json"; do
|
||||||
if [ -e "$asset" ]; then
|
if [ -e "$asset" ]; then
|
||||||
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
die "Telemetry asset ${asset} already exists; refusing to overwrite"
|
||||||
fi
|
fi
|
||||||
|
|
@ -103,6 +105,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
@ -112,6 +115,224 @@ datasources:
|
||||||
url: http://jaeger:16686
|
url: http://jaeger:16686
|
||||||
editable: true
|
editable: true
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
|
cat <<'EOF' > "$graf_dash_cfg"
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
EOF
|
||||||
|
|
||||||
|
# Copy the dashboard JSON inline to avoid line-length issues
|
||||||
|
cat > "$dash_json" <<'DASHBOARD_JSON'
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [{"color": "green", "value": null}]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 0},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "prometheus"},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 0},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "table", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_prompt_tokens_total", "legendFormat": "Prompt - {{model_id}}", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "llama_stack_tokens_total", "legendFormat": "Total - {{model_id}}", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 8},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p95", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))", "legendFormat": "p99", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 12, "y": 8},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_duration_milliseconds_count)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 6, "x": 18, "y": 8},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {"calcs": ["lastNotNull"], "fields": "", "values": false},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "sum(llama_stack_http_server_active_requests)", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 0, "y": 16},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])", "legendFormat": "{{http_target}} - {{http_status_code}}", "refId": "A"}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {"drawStyle": "line", "lineInterpolation": "linear", "showPoints": "auto", "fillOpacity": 10},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {"mode": "absolute", "steps": [{"color": "green", "value": null}]},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {"h": 8, "w": 12, "x": 12, "y": 16},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {"calcs": [], "displayMode": "list", "placement": "bottom", "showLegend": true},
|
||||||
|
"tooltip": {"mode": "multi", "sort": "none"}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])", "legendFormat": "Request", "refId": "A"},
|
||||||
|
{"datasource": {"type": "prometheus", "uid": "$(DS_PROMETHEUS}"}, "expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])", "legendFormat": "Response", "refId": "B"}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": ["llama-stack"],
|
||||||
|
"templating": {"list": []},
|
||||||
|
"time": {"from": "now-15m", "to": "now"},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
DASHBOARD_JSON
|
||||||
}
|
}
|
||||||
|
|
||||||
# Cleanup function to remove temporary files
|
# Cleanup function to remove temporary files
|
||||||
|
|
@ -372,6 +593,8 @@ if [ "$WITH_TELEMETRY" = true ]; then
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "${TELEMETRY_ASSETS_DIR}/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
docker.io/grafana/grafana:11.0.0 > /dev/null 2>&1; then
|
||||||
die "Grafana startup failed"
|
die "Grafana startup failed"
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -208,6 +208,15 @@ if [[ "$STACK_CONFIG" == *"server:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
echo "=== Starting Llama Stack Server ==="
|
echo "=== Starting Llama Stack Server ==="
|
||||||
export LLAMA_STACK_LOG_WIDTH=120
|
export LLAMA_STACK_LOG_WIDTH=120
|
||||||
|
|
||||||
|
# Configure telemetry collector for server mode
|
||||||
|
# Use a fixed port for the OTEL collector so the server can connect to it
|
||||||
|
COLLECTOR_PORT=4317
|
||||||
|
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||||
|
export OTEL_EXPORTER_OTLP_ENDPOINT="http://127.0.0.1:${COLLECTOR_PORT}"
|
||||||
|
export OTEL_EXPORTER_OTLP_PROTOCOL="http/protobuf"
|
||||||
|
export OTEL_BSP_SCHEDULE_DELAY="200"
|
||||||
|
export OTEL_BSP_EXPORT_TIMEOUT="2000"
|
||||||
|
|
||||||
# remove "server:" from STACK_CONFIG
|
# remove "server:" from STACK_CONFIG
|
||||||
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
stack_config=$(echo "$STACK_CONFIG" | sed 's/^server://')
|
||||||
nohup llama stack run $stack_config > server.log 2>&1 &
|
nohup llama stack run $stack_config > server.log 2>&1 &
|
||||||
|
|
@ -284,10 +293,15 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
docker stop "$container_name" 2>/dev/null || true
|
docker stop "$container_name" 2>/dev/null || true
|
||||||
docker rm "$container_name" 2>/dev/null || true
|
docker rm "$container_name" 2>/dev/null || true
|
||||||
|
|
||||||
|
# Configure telemetry collector port shared between host and container
|
||||||
|
COLLECTOR_PORT=4317
|
||||||
|
export LLAMA_STACK_TEST_COLLECTOR_PORT="${COLLECTOR_PORT}"
|
||||||
|
|
||||||
# Build environment variables for docker run
|
# Build environment variables for docker run
|
||||||
DOCKER_ENV_VARS=""
|
DOCKER_ENV_VARS=""
|
||||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_INFERENCE_MODE=$INFERENCE_MODE"
|
||||||
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e LLAMA_STACK_TEST_STACK_CONFIG_TYPE=server"
|
||||||
|
DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e OTEL_EXPORTER_OTLP_ENDPOINT=http://localhost:${COLLECTOR_PORT}"
|
||||||
|
|
||||||
# Pass through API keys if they exist
|
# Pass through API keys if they exist
|
||||||
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
[ -n "${TOGETHER_API_KEY:-}" ] && DOCKER_ENV_VARS="$DOCKER_ENV_VARS -e TOGETHER_API_KEY=$TOGETHER_API_KEY"
|
||||||
|
|
@ -308,8 +322,20 @@ if [[ "$STACK_CONFIG" == *"docker:"* && "$COLLECT_ONLY" == false ]]; then
|
||||||
fi
|
fi
|
||||||
echo "Using image: $IMAGE_NAME"
|
echo "Using image: $IMAGE_NAME"
|
||||||
|
|
||||||
docker run -d --network host --name "$container_name" \
|
# On macOS/Darwin, --network host doesn't work as expected due to Docker running in a VM
|
||||||
-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
|
# Use regular port mapping instead
|
||||||
|
NETWORK_MODE=""
|
||||||
|
PORT_MAPPINGS=""
|
||||||
|
if [[ "$(uname)" != "Darwin" ]] && [[ "$(uname)" != *"MINGW"* ]]; then
|
||||||
|
NETWORK_MODE="--network host"
|
||||||
|
else
|
||||||
|
# On non-Linux (macOS, Windows), need explicit port mappings for both app and telemetry
|
||||||
|
PORT_MAPPINGS="-p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT -p $COLLECTOR_PORT:$COLLECTOR_PORT"
|
||||||
|
echo "Using bridge networking with port mapping (non-Linux)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
docker run -d $NETWORK_MODE --name "$container_name" \
|
||||||
|
$PORT_MAPPINGS \
|
||||||
$DOCKER_ENV_VARS \
|
$DOCKER_ENV_VARS \
|
||||||
"$IMAGE_NAME" \
|
"$IMAGE_NAME" \
|
||||||
--port $LLAMA_STACK_PORT
|
--port $LLAMA_STACK_PORT
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
cd llama_stack/ui
|
cd src/llama_stack/ui
|
||||||
|
|
||||||
if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
|
if [ ! -d node_modules ] || [ ! -x node_modules/.bin/prettier ] || [ ! -x node_modules/.bin/eslint ]; then
|
||||||
echo "UI dependencies not installed, skipping prettier/linter check"
|
echo "UI dependencies not installed, skipping prettier/linter check"
|
||||||
|
|
|
||||||
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
12
scripts/telemetry/grafana-dashboards.yaml
Normal file
|
|
@ -0,0 +1,12 @@
|
||||||
|
apiVersion: 1
|
||||||
|
|
||||||
|
providers:
|
||||||
|
- name: 'Llama Stack'
|
||||||
|
orgId: 1
|
||||||
|
folder: ''
|
||||||
|
type: file
|
||||||
|
disableDeletion: false
|
||||||
|
updateIntervalSeconds: 10
|
||||||
|
allowUiUpdates: true
|
||||||
|
options:
|
||||||
|
path: /etc/grafana/provisioning/dashboards
|
||||||
|
|
@ -5,6 +5,7 @@ datasources:
|
||||||
type: prometheus
|
type: prometheus
|
||||||
access: proxy
|
access: proxy
|
||||||
url: http://prometheus:9090
|
url: http://prometheus:9090
|
||||||
|
uid: prometheus
|
||||||
isDefault: true
|
isDefault: true
|
||||||
editable: true
|
editable: true
|
||||||
|
|
||||||
|
|
|
||||||
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
457
scripts/telemetry/llama-stack-dashboard.json
Normal file
|
|
@ -0,0 +1,457 @@
|
||||||
|
{
|
||||||
|
"annotations": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"editable": true,
|
||||||
|
"fiscalYearStartMonth": 0,
|
||||||
|
"graphTooltip": 0,
|
||||||
|
"id": null,
|
||||||
|
"links": [],
|
||||||
|
"liveNow": false,
|
||||||
|
"panels": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 1,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_completion_tokens_total",
|
||||||
|
"legendFormat": "{{model_id}} ({{provider_id}})",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Completion Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 0
|
||||||
|
},
|
||||||
|
"id": 2,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "table",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_prompt_tokens_total",
|
||||||
|
"legendFormat": "Prompt - {{model_id}}",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "llama_stack_tokens_total",
|
||||||
|
"legendFormat": "Total - {{model_id}}",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Prompt & Total Tokens",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "ms"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 3,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.95, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p95",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "histogram_quantile(0.99, rate(llama_stack_http_server_duration_milliseconds_bucket[5m]))",
|
||||||
|
"legendFormat": "p99",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "HTTP Request Duration (p95, p99)",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 12,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 4,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "area",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_duration_milliseconds_count)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Total Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 6,
|
||||||
|
"x": 18,
|
||||||
|
"y": 8
|
||||||
|
},
|
||||||
|
"id": 5,
|
||||||
|
"options": {
|
||||||
|
"colorMode": "value",
|
||||||
|
"graphMode": "none",
|
||||||
|
"justifyMode": "auto",
|
||||||
|
"orientation": "auto",
|
||||||
|
"reduceOptions": {
|
||||||
|
"calcs": [
|
||||||
|
"lastNotNull"
|
||||||
|
],
|
||||||
|
"fields": "",
|
||||||
|
"values": false
|
||||||
|
},
|
||||||
|
"textMode": "auto"
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "sum(llama_stack_http_server_active_requests)",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Active Requests",
|
||||||
|
"type": "stat"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "reqps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 0,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 6,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_duration_milliseconds_count[5m])",
|
||||||
|
"legendFormat": "{{http_target}} - {{http_status_code}}",
|
||||||
|
"refId": "A"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request Rate",
|
||||||
|
"type": "timeseries"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"fieldConfig": {
|
||||||
|
"defaults": {
|
||||||
|
"custom": {
|
||||||
|
"drawStyle": "line",
|
||||||
|
"lineInterpolation": "linear",
|
||||||
|
"showPoints": "auto",
|
||||||
|
"fillOpacity": 10
|
||||||
|
},
|
||||||
|
"mappings": [],
|
||||||
|
"thresholds": {
|
||||||
|
"mode": "absolute",
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"color": "green",
|
||||||
|
"value": null
|
||||||
|
}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"unit": "Bps"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"gridPos": {
|
||||||
|
"h": 8,
|
||||||
|
"w": 12,
|
||||||
|
"x": 12,
|
||||||
|
"y": 16
|
||||||
|
},
|
||||||
|
"id": 7,
|
||||||
|
"options": {
|
||||||
|
"legend": {
|
||||||
|
"calcs": [],
|
||||||
|
"displayMode": "list",
|
||||||
|
"placement": "bottom",
|
||||||
|
"showLegend": true
|
||||||
|
},
|
||||||
|
"tooltip": {
|
||||||
|
"mode": "multi",
|
||||||
|
"sort": "none"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"targets": [
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_request_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Request",
|
||||||
|
"refId": "A"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"datasource": {
|
||||||
|
"type": "prometheus",
|
||||||
|
"uid": "prometheus"
|
||||||
|
},
|
||||||
|
"expr": "rate(llama_stack_http_server_response_size_bytes_sum[5m])",
|
||||||
|
"legendFormat": "Response",
|
||||||
|
"refId": "B"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"title": "Request/Response Sizes",
|
||||||
|
"type": "timeseries"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"refresh": "5s",
|
||||||
|
"schemaVersion": 38,
|
||||||
|
"tags": [
|
||||||
|
"llama-stack"
|
||||||
|
],
|
||||||
|
"templating": {
|
||||||
|
"list": []
|
||||||
|
},
|
||||||
|
"time": {
|
||||||
|
"from": "now-15m",
|
||||||
|
"to": "now"
|
||||||
|
},
|
||||||
|
"timepicker": {},
|
||||||
|
"timezone": "browser",
|
||||||
|
"title": "Llama Stack Metrics",
|
||||||
|
"uid": "llama-stack-metrics",
|
||||||
|
"version": 0,
|
||||||
|
"weekStart": ""
|
||||||
|
}
|
||||||
|
|
@ -135,6 +135,8 @@ $CONTAINER_RUNTIME run -d --name grafana \
|
||||||
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
-e GF_SECURITY_ADMIN_PASSWORD=admin \
|
||||||
-e GF_USERS_ALLOW_SIGN_UP=false \
|
-e GF_USERS_ALLOW_SIGN_UP=false \
|
||||||
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
-v "$SCRIPT_DIR/grafana-datasources.yaml:/etc/grafana/provisioning/datasources/datasources.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/grafana-dashboards.yaml:/etc/grafana/provisioning/dashboards/dashboards.yaml:Z" \
|
||||||
|
-v "$SCRIPT_DIR/llama-stack-dashboard.json:/etc/grafana/provisioning/dashboards/llama-stack-dashboard.json:Z" \
|
||||||
docker.io/grafana/grafana:11.0.0
|
docker.io/grafana/grafana:11.0.0
|
||||||
|
|
||||||
# Wait for services to start
|
# Wait for services to start
|
||||||
|
|
|
||||||
|
|
@ -27,4 +27,4 @@ fi
|
||||||
|
|
||||||
# Run unit tests with coverage
|
# Run unit tests with coverage
|
||||||
uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
|
uv run --python "$PYTHON_VERSION" --with-editable . --group unit \
|
||||||
coverage run --source=llama_stack -m pytest -s -v tests/unit/ "$@"
|
coverage run --source=src/llama_stack -m pytest -s -v tests/unit/ "$@"
|
||||||
|
|
|
||||||
|
|
@ -38,6 +38,7 @@ from .openai_responses import (
|
||||||
OpenAIResponseInputTool,
|
OpenAIResponseInputTool,
|
||||||
OpenAIResponseObject,
|
OpenAIResponseObject,
|
||||||
OpenAIResponseObjectStream,
|
OpenAIResponseObjectStream,
|
||||||
|
OpenAIResponsePrompt,
|
||||||
OpenAIResponseText,
|
OpenAIResponseText,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -149,13 +150,13 @@ class ShieldCallStep(StepCommon):
|
||||||
class MemoryRetrievalStep(StepCommon):
|
class MemoryRetrievalStep(StepCommon):
|
||||||
"""A memory retrieval step in an agent turn.
|
"""A memory retrieval step in an agent turn.
|
||||||
|
|
||||||
:param vector_db_ids: The IDs of the vector databases to retrieve context from.
|
:param vector_store_ids: The IDs of the vector databases to retrieve context from.
|
||||||
:param inserted_context: The context retrieved from the vector databases.
|
:param inserted_context: The context retrieved from the vector databases.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
step_type: Literal[StepType.memory_retrieval] = StepType.memory_retrieval
|
||||||
# TODO: should this be List[str]?
|
# TODO: should this be List[str]?
|
||||||
vector_db_ids: str
|
vector_store_ids: str
|
||||||
inserted_context: InterleavedContent
|
inserted_context: InterleavedContent
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -810,6 +811,7 @@ class Agents(Protocol):
|
||||||
self,
|
self,
|
||||||
input: str | list[OpenAIResponseInput],
|
input: str | list[OpenAIResponseInput],
|
||||||
model: str,
|
model: str,
|
||||||
|
prompt: OpenAIResponsePrompt | None = None,
|
||||||
instructions: str | None = None,
|
instructions: str | None = None,
|
||||||
previous_response_id: str | None = None,
|
previous_response_id: str | None = None,
|
||||||
conversation: str | None = None,
|
conversation: str | None = None,
|
||||||
|
|
@ -831,6 +833,7 @@ class Agents(Protocol):
|
||||||
|
|
||||||
:param input: Input message(s) to create the response.
|
:param input: Input message(s) to create the response.
|
||||||
:param model: The underlying LLM used for completions.
|
:param model: The underlying LLM used for completions.
|
||||||
|
:param prompt: (Optional) Prompt object with ID, version, and variables.
|
||||||
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
|
||||||
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
:param conversation: (Optional) The ID of a conversation to add the response to. Must begin with 'conv_'. Input and output messages will be automatically added to the conversation.
|
||||||
:param include: (Optional) Additional fields to include in the response.
|
:param include: (Optional) Additional fields to include in the response.
|
||||||
|
|
@ -4,9 +4,10 @@
|
||||||
# This source code is licensed under the terms described in the LICENSE file in
|
# This source code is licensed under the terms described in the LICENSE file in
|
||||||
# the root directory of this source tree.
|
# the root directory of this source tree.
|
||||||
|
|
||||||
|
from collections.abc import Sequence
|
||||||
from typing import Annotated, Any, Literal
|
from typing import Annotated, Any, Literal
|
||||||
|
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field, model_validator
|
||||||
from typing_extensions import TypedDict
|
from typing_extensions import TypedDict
|
||||||
|
|
||||||
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
|
from llama_stack.apis.vector_io import SearchRankingOptions as FileSearchRankingOptions
|
||||||
|
|
@ -46,23 +47,66 @@ class OpenAIResponseInputMessageContentImage(BaseModel):
|
||||||
|
|
||||||
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
:param detail: Level of detail for image processing, can be "low", "high", or "auto"
|
||||||
:param type: Content type identifier, always "input_image"
|
:param type: Content type identifier, always "input_image"
|
||||||
|
:param file_id: (Optional) The ID of the file to be sent to the model.
|
||||||
:param image_url: (Optional) URL of the image content
|
:param image_url: (Optional) URL of the image content
|
||||||
"""
|
"""
|
||||||
|
|
||||||
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
detail: Literal["low"] | Literal["high"] | Literal["auto"] = "auto"
|
||||||
type: Literal["input_image"] = "input_image"
|
type: Literal["input_image"] = "input_image"
|
||||||
# TODO: handle file_id
|
file_id: str | None = None
|
||||||
image_url: str | None = None
|
image_url: str | None = None
|
||||||
|
|
||||||
|
|
||||||
# TODO: handle file content types
|
@json_schema_type
|
||||||
|
class OpenAIResponseInputMessageContentFile(BaseModel):
|
||||||
|
"""File content for input messages in OpenAI response format.
|
||||||
|
|
||||||
|
:param type: The type of the input item. Always `input_file`.
|
||||||
|
:param file_data: The data of the file to be sent to the model.
|
||||||
|
:param file_id: (Optional) The ID of the file to be sent to the model.
|
||||||
|
:param file_url: The URL of the file to be sent to the model.
|
||||||
|
:param filename: The name of the file to be sent to the model.
|
||||||
|
"""
|
||||||
|
|
||||||
|
type: Literal["input_file"] = "input_file"
|
||||||
|
file_data: str | None = None
|
||||||
|
file_id: str | None = None
|
||||||
|
file_url: str | None = None
|
||||||
|
filename: str | None = None
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def validate_file_source(self) -> "OpenAIResponseInputMessageContentFile":
|
||||||
|
if not any([self.file_data, self.file_id, self.file_url, self.filename]):
|
||||||
|
raise ValueError(
|
||||||
|
"At least one of 'file_data', 'file_id', 'file_url', or 'filename' must be provided for file content"
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
OpenAIResponseInputMessageContent = Annotated[
|
OpenAIResponseInputMessageContent = Annotated[
|
||||||
OpenAIResponseInputMessageContentText | OpenAIResponseInputMessageContentImage,
|
OpenAIResponseInputMessageContentText
|
||||||
|
| OpenAIResponseInputMessageContentImage
|
||||||
|
| OpenAIResponseInputMessageContentFile,
|
||||||
Field(discriminator="type"),
|
Field(discriminator="type"),
|
||||||
]
|
]
|
||||||
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
register_schema(OpenAIResponseInputMessageContent, name="OpenAIResponseInputMessageContent")
|
||||||
|
|
||||||
|
|
||||||
|
@json_schema_type
|
||||||
|
class OpenAIResponsePrompt(BaseModel):
|
||||||
|
"""OpenAI compatible Prompt object that is used in OpenAI responses.
|
||||||
|
|
||||||
|
:param id: Unique identifier of the prompt template
|
||||||
|
:param variables: Dictionary of variable names to OpenAIResponseInputMessageContent structure for template substitution. The substitution values can either be strings, or other Response input types
|
||||||
|
like images or files.
|
||||||
|
:param version: Version number of the prompt to use (defaults to latest if not specified)
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: str
|
||||||
|
variables: dict[str, OpenAIResponseInputMessageContent] | None = None
|
||||||
|
version: str | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
class OpenAIResponseAnnotationFileCitation(BaseModel):
|
||||||
"""File citation annotation for referencing specific files in response content.
|
"""File citation annotation for referencing specific files in response content.
|
||||||
|
|
@ -159,7 +203,7 @@ class OpenAIResponseMessage(BaseModel):
|
||||||
scenarios.
|
scenarios.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
content: str | list[OpenAIResponseInputMessageContent] | list[OpenAIResponseOutputMessageContent]
|
content: str | Sequence[OpenAIResponseInputMessageContent] | Sequence[OpenAIResponseOutputMessageContent]
|
||||||
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
role: Literal["system"] | Literal["developer"] | Literal["user"] | Literal["assistant"]
|
||||||
type: Literal["message"] = "message"
|
type: Literal["message"] = "message"
|
||||||
|
|
||||||
|
|
@ -211,10 +255,10 @@ class OpenAIResponseOutputMessageFileSearchToolCall(BaseModel):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
id: str
|
id: str
|
||||||
queries: list[str]
|
queries: Sequence[str]
|
||||||
status: str
|
status: str
|
||||||
type: Literal["file_search_call"] = "file_search_call"
|
type: Literal["file_search_call"] = "file_search_call"
|
||||||
results: list[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
results: Sequence[OpenAIResponseOutputMessageFileSearchToolCallResults] | None = None
|
||||||
|
|
||||||
|
|
||||||
@json_schema_type
|
@json_schema_type
|
||||||
|
|
@ -538,6 +582,7 @@ class OpenAIResponseObject(BaseModel):
|
||||||
:param output: List of generated output items (messages, tool calls, etc.)
|
:param output: List of generated output items (messages, tool calls, etc.)
|
||||||
:param parallel_tool_calls: Whether tool calls can be executed in parallel
|
:param parallel_tool_calls: Whether tool calls can be executed in parallel
|
||||||
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
:param previous_response_id: (Optional) ID of the previous response in a conversation
|
||||||
|
:param prompt: (Optional) Reference to a prompt template and its variables.
|
||||||
:param status: Current status of the response generation
|
:param status: Current status of the response generation
|
||||||
:param temperature: (Optional) Sampling temperature used for generation
|
:param temperature: (Optional) Sampling temperature used for generation
|
||||||
:param text: Text formatting configuration for the response
|
:param text: Text formatting configuration for the response
|
||||||
|
|
@ -553,16 +598,17 @@ class OpenAIResponseObject(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
model: str
|
model: str
|
||||||
object: Literal["response"] = "response"
|
object: Literal["response"] = "response"
|
||||||
output: list[OpenAIResponseOutput]
|
output: Sequence[OpenAIResponseOutput]
|
||||||
parallel_tool_calls: bool = False
|
parallel_tool_calls: bool = False
|
||||||
previous_response_id: str | None = None
|
previous_response_id: str | None = None
|
||||||
|
prompt: OpenAIResponsePrompt | None = None
|
||||||
status: str
|
status: str
|
||||||
temperature: float | None = None
|
temperature: float | None = None
|
||||||
# Default to text format to avoid breaking the loading of old responses
|
# Default to text format to avoid breaking the loading of old responses
|
||||||
# before the field was added. New responses will have this set always.
|
# before the field was added. New responses will have this set always.
|
||||||
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
text: OpenAIResponseText = OpenAIResponseText(format=OpenAIResponseTextFormat(type="text"))
|
||||||
top_p: float | None = None
|
top_p: float | None = None
|
||||||
tools: list[OpenAIResponseTool] | None = None
|
tools: Sequence[OpenAIResponseTool] | None = None
|
||||||
truncation: str | None = None
|
truncation: str | None = None
|
||||||
usage: OpenAIResponseUsage | None = None
|
usage: OpenAIResponseUsage | None = None
|
||||||
instructions: str | None = None
|
instructions: str | None = None
|
||||||
|
|
@ -1254,14 +1300,9 @@ class OpenAIResponseInputFunctionToolCallOutput(BaseModel):
|
||||||
|
|
||||||
OpenAIResponseInput = Annotated[
|
OpenAIResponseInput = Annotated[
|
||||||
# Responses API allows output messages to be passed in as input
|
# Responses API allows output messages to be passed in as input
|
||||||
OpenAIResponseOutputMessageWebSearchToolCall
|
OpenAIResponseOutput
|
||||||
| OpenAIResponseOutputMessageFileSearchToolCall
|
|
||||||
| OpenAIResponseOutputMessageFunctionToolCall
|
|
||||||
| OpenAIResponseInputFunctionToolCallOutput
|
| OpenAIResponseInputFunctionToolCallOutput
|
||||||
| OpenAIResponseMCPApprovalRequest
|
|
||||||
| OpenAIResponseMCPApprovalResponse
|
| OpenAIResponseMCPApprovalResponse
|
||||||
| OpenAIResponseOutputMessageMCPCall
|
|
||||||
| OpenAIResponseOutputMessageMCPListTools
|
|
||||||
| OpenAIResponseMessage,
|
| OpenAIResponseMessage,
|
||||||
Field(union_mode="left_to_right"),
|
Field(union_mode="left_to_right"),
|
||||||
]
|
]
|
||||||
|
|
@ -1275,7 +1316,7 @@ class ListOpenAIResponseInputItem(BaseModel):
|
||||||
:param object: Object type identifier, always "list"
|
:param object: Object type identifier, always "list"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data: list[OpenAIResponseInput]
|
data: Sequence[OpenAIResponseInput]
|
||||||
object: Literal["list"] = "list"
|
object: Literal["list"] = "list"
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1286,7 +1327,7 @@ class OpenAIResponseObjectWithInput(OpenAIResponseObject):
|
||||||
:param input: List of input items that led to this response
|
:param input: List of input items that led to this response
|
||||||
"""
|
"""
|
||||||
|
|
||||||
input: list[OpenAIResponseInput]
|
input: Sequence[OpenAIResponseInput]
|
||||||
|
|
||||||
def to_response_object(self) -> OpenAIResponseObject:
|
def to_response_object(self) -> OpenAIResponseObject:
|
||||||
"""Convert to OpenAIResponseObject by excluding input field."""
|
"""Convert to OpenAIResponseObject by excluding input field."""
|
||||||
|
|
@ -1304,7 +1345,7 @@ class ListOpenAIResponseObject(BaseModel):
|
||||||
:param object: Object type identifier, always "list"
|
:param object: Object type identifier, always "list"
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data: list[OpenAIResponseObjectWithInput]
|
data: Sequence[OpenAIResponseObjectWithInput]
|
||||||
has_more: bool
|
has_more: bool
|
||||||
first_id: str
|
first_id: str
|
||||||
last_id: str
|
last_id: str
|
||||||
|
|
@ -21,8 +21,8 @@ from typing_extensions import TypedDict
|
||||||
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
from llama_stack.apis.common.content_types import ContentDelta, InterleavedContent
|
||||||
from llama_stack.apis.common.responses import Order
|
from llama_stack.apis.common.responses import Order
|
||||||
from llama_stack.apis.models import Model
|
from llama_stack.apis.models import Model
|
||||||
from llama_stack.apis.telemetry import MetricResponseMixin
|
|
||||||
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
from llama_stack.apis.version import LLAMA_STACK_API_V1, LLAMA_STACK_API_V1ALPHA
|
||||||
|
from llama_stack.core.telemetry.telemetry import MetricResponseMixin
|
||||||
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
from llama_stack.core.telemetry.trace_protocol import trace_protocol
|
||||||
from llama_stack.models.llama.datatypes import (
|
from llama_stack.models.llama.datatypes import (
|
||||||
BuiltinTool,
|
BuiltinTool,
|
||||||
|
|
@ -97,7 +97,7 @@ class SamplingParams(BaseModel):
|
||||||
|
|
||||||
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
strategy: SamplingStrategy = Field(default_factory=GreedySamplingStrategy)
|
||||||
|
|
||||||
max_tokens: int | None = 0
|
max_tokens: int | None = None
|
||||||
repetition_penalty: float | None = 1.0
|
repetition_penalty: float | None = 1.0
|
||||||
stop: list[str] | None = None
|
stop: list[str] | None = None
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue